892 files changed, 146380 insertions, 0 deletions
diff --git a/llvm/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll b/llvm/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll
new file mode 100644
index 00000000000..19010d22d72
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep 4294967295
+
+define i64 @test(i64 %Val) {
+        %tmp.3 = trunc i64 %Val to i32          ; <i32> [#uses=1]
+        %tmp.8 = zext i32 %tmp.3 to i64         ; <i64> [#uses=1]
+        ret i64 %tmp.8
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll b/llvm/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll
new file mode 100644
index 00000000000..8645249b7ca
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+@X = global i32 5               ; <i32*> [#uses=1]
+
+define i64 @test() {
+        %C = add i64 1, 2               ; <i64> [#uses=1]
+        %V = add i64 ptrtoint (i32* @X to i64), %C              ; <i64> [#uses=1]
+        ret i64 %V
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll b/llvm/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll
new file mode 100644
index 00000000000..154f3ba65e9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll
@@ -0,0 +1,16 @@
+; This testcase causes an infinite loop in the instruction combiner,
+; because it things that the constant value is a not expression... and 
+; constantly inverts the branch back and forth.
+;
+; RUN: opt < %s -instcombine -disable-output
+
+define i8 @test19(i1 %c) {
+        br i1 true, label %True, label %False
+
+True:           ; preds = %0
+        ret i8 1
+
+False:          ; preds = %0
+        ret i8 3
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll b/llvm/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
new file mode 100644
index 00000000000..b3815458089
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
@@ -0,0 +1,44 @@
+;
+; Test: ExternalConstant
+;
+; Description:
+;	This regression test helps check whether the instruction combining
+;	optimization pass correctly handles global variables which are marked
+;	as external and constant.
+;
+;	If a problem occurs, we should die on an assert().  Otherwise, we
+;	should pass through the optimizer without failure.
+;
+; Extra code:
+; RUN: opt < %s -instcombine
+; END.
+
+target datalayout = "e-p:32:32"
+@silly = external constant i32          ; <i32*> [#uses=1]
+
+declare void @bzero(i8*, i32)
+
+declare void @bcopy(i8*, i8*, i32)
+
+declare i32 @bcmp(i8*, i8*, i32)
+
+declare i32 @fputs(i8*, i8*)
+
+declare i32 @fputs_unlocked(i8*, i8*)
+
+define i32 @function(i32 %a.1) {
+entry:
+        %a.0 = alloca i32               ; <i32*> [#uses=2]
+        %result = alloca i32            ; <i32*> [#uses=2]
+        store i32 %a.1, i32* %a.0
+        %tmp.0 = load i32, i32* %a.0         ; <i32> [#uses=1]
+        %tmp.1 = load i32, i32* @silly               ; <i32> [#uses=1]
+        %tmp.2 = add i32 %tmp.0, %tmp.1         ; <i32> [#uses=1]
+        store i32 %tmp.2, i32* %result
+        br label %return
+
+return:         ; preds = %entry
+        %tmp.3 = load i32, i32* %result              ; <i32> [#uses=1]
+        ret i32 %tmp.3
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/llvm/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
new file mode 100644
index 00000000000..50b9fdb1645
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -0,0 +1,21 @@
+; This testcase can be simplified by "realizing" that alloca can never return
+; null.
+; RUN: opt < %s -instcombine -simplifycfg -S | FileCheck %s
+; CHECK-NOT: br
+
+declare i32 @bitmap_clear(...)
+
+define i32 @oof() {
+entry:
+        %live_head = alloca i32         ; <i32*> [#uses=2]
+        %tmp.1 = icmp ne i32* %live_head, null          ; <i1> [#uses=1]
+        br i1 %tmp.1, label %then, label %UnifiedExitNode
+
+then:           ; preds = %entry
+        %tmp.4 = call i32 (...) @bitmap_clear( i32* %live_head )               ; <i32> [#uses=0]
+        br label %UnifiedExitNode
+
+UnifiedExitNode:                ; preds = %then, %entry
+        ret i32 0
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll b/llvm/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
new file mode 100644
index 00000000000..3e0cf1247d9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep load
+
+define void @test(i32* %P) {
+        ; Dead but not deletable!
+        %X = load volatile i32, i32* %P              ; <i32> [#uses=0]
+        ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll b/llvm/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll
new file mode 100644
index 00000000000..1fc8aa7c278
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -disable-output
+
+declare i32* @bar()
+
+define float* @foo() personality i32 (...)* @__gxx_personality_v0 {
+        %tmp.11 = invoke float* bitcast (i32* ()* @bar to float* ()*)( )
+                        to label %invoke_cont unwind label %X           ; <float*> [#uses=1]
+
+invoke_cont:            ; preds = %0
+        ret float* %tmp.11
+
+X:              ; preds = %0
+        %exn = landingpad {i8*, i32}
+                 cleanup
+        ret float* null
+}
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll b/llvm/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
new file mode 100644
index 00000000000..aff39f8bf97
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
@@ -0,0 +1,13 @@
+; The cast in this testcase is not eliminable on a 32-bit target!
+; RUN: opt < %s -instcombine -S | grep inttoptr
+
+target datalayout = "e-p:32:32"
+
+declare void @foo(...)
+
+define void @test(i64 %X) {
+        %Y = inttoptr i64 %X to i32*            ; <i32*> [#uses=1]
+        call void (...) @foo( i32* %Y )
+        ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll b/llvm/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
new file mode 100644
index 00000000000..7471d8b3462
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
@@ -0,0 +1,31 @@
+; Test for a problem afflicting several C++ programs in the testsuite.  The 
+; instcombine pass is trying to get rid of the cast in the invoke instruction, 
+; inserting a cast of the return value after the PHI instruction, but which is
+; used by the PHI instruction.  This is bad: because of the semantics of the
+; invoke instruction, we really cannot perform this transformation at all at
+; least without splitting the critical edge.
+;
+; RUN: opt < %s -instcombine -disable-output
+
+declare i8* @test()
+
+define i32 @foo() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+        br i1 true, label %cont, label %call
+
+call:           ; preds = %entry
+        %P = invoke i32* bitcast (i8* ()* @test to i32* ()*)( )
+                        to label %cont unwind label %N          ; <i32*> [#uses=1]
+
+cont:           ; preds = %call, %entry
+        %P2 = phi i32* [ %P, %call ], [ null, %entry ]          ; <i32*> [#uses=1]
+        %V = load i32, i32* %P2              ; <i32> [#uses=1]
+        ret i32 %V
+
+N:              ; preds = %call
+        %exn = landingpad {i8*, i32}
+                 cleanup
+        ret i32 0
+}
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll b/llvm/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll
new file mode 100644
index 00000000000..a08e3a884ce
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | not grep 34
+
+define i32 @test(i32 %X) {
+        ; Do not fold into shr X, 34, as this uses undefined behavior!
+        %Y = ashr i32 %X, 17            ; <i32> [#uses=1]
+        %Z = ashr i32 %Y, 17            ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test2(i32 %X) {
+        ; Do not fold into shl X, 34, as this uses undefined behavior!
+        %Y = shl i32 %X, 17             ; <i32> [#uses=1]
+        %Z = shl i32 %Y, 17             ; <i32> [#uses=1]
+        ret i32 %Z
+}
diff --git a/llvm/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll b/llvm/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll
new file mode 100644
index 00000000000..ff20d7db948
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll
@@ -0,0 +1,13 @@
+; This testcase caused the combiner to go into an infinite loop, moving the 
+; cast back and forth, changing the seteq to operate on int vs uint and back.
+
+; RUN: opt < %s -instcombine -disable-output
+
+define i1 @test(i32 %A, i32 %B) {
+        %C = sub i32 0, %A              ; <i32> [#uses=1]
+        %Cc = bitcast i32 %C to i32             ; <i32> [#uses=1]
+        %D = sub i32 0, %B              ; <i32> [#uses=1]
+        %E = icmp eq i32 %Cc, %D                ; <i1> [#uses=1]
+        ret i1 %E
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll b/llvm/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll
new file mode 100644
index 00000000000..84f9bad69d8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define i32 @test() {
+        ret i32 0
+
+Loop:           ; preds = %Loop
+        %X = add i32 %X, 1              ; <i32> [#uses=1]
+        br label %Loop
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll b/llvm/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
new file mode 100644
index 00000000000..ff855dc13e5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+%Ty = type opaque
+
+define i32 @test(%Ty* %X) {
+        %Y = bitcast %Ty* %X to i32*            ; <i32*> [#uses=1]
+        %Z = load i32, i32* %Y               ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll b/llvm/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll
new file mode 100644
index 00000000000..819260b60b1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -disable-output
+
+@p = weak global i32 0          ; <i32*> [#uses=1]
+
+define i32 @test(i32 %x) {
+        %y = mul i32 %x, ptrtoint (i32* @p to i32)              ; <i32> [#uses=1]
+        ret i32 %y
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll b/llvm/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll
new file mode 100644
index 00000000000..f3e5d77c0c8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine
+
+; This testcase should not send the instcombiner into an infinite loop!
+
+define i32 @test(i32 %X) {
+        %Y = srem i32 %X, 0             ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll b/llvm/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
new file mode 100644
index 00000000000..4233797b6d7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep "ret i1 false"
+
+define i1 @test(i1 %V) {
+        %Y = icmp ult i1 %V, false              ; <i1> [#uses=1]
+        ret i1 %Y
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
new file mode 100644
index 00000000000..10122e48ab6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -mem2reg -S | \
+; RUN:   not grep "i32 1"
+
+; When propagating the load through the select, make sure that the load is
+; inserted where the original load was, not where the select is.  Not doing
+; so could produce incorrect results!
+
+define i32 @test(i1 %C) {
+        %X = alloca i32         ; <i32*> [#uses=3]
+        %X2 = alloca i32                ; <i32*> [#uses=2]
+        store i32 1, i32* %X
+        store i32 2, i32* %X2
+        %Y = select i1 %C, i32* %X, i32* %X2            ; <i32*> [#uses=1]
+        store i32 3, i32* %X
+        %Z = load i32, i32* %Y               ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
new file mode 100644
index 00000000000..981a4f301a9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -mem2reg -simplifycfg | \
+; RUN:   llvm-dis | grep -v store | not grep "i32 1"
+
+; Test to make sure that instcombine does not accidentally propagate the load
+; into the PHI, which would break the program.
+
+define i32 @test(i1 %C) {
+entry:
+        %X = alloca i32         ; <i32*> [#uses=3]
+        %X2 = alloca i32                ; <i32*> [#uses=2]
+        store i32 1, i32* %X
+        store i32 2, i32* %X2
+        br i1 %C, label %cond_true.i, label %cond_continue.i
+
+cond_true.i:            ; preds = %entry
+        br label %cond_continue.i
+
+cond_continue.i:                ; preds = %cond_true.i, %entry
+        %mem_tmp.i.0 = phi i32* [ %X, %cond_true.i ], [ %X2, %entry ]           ; <i32*> [#uses=1]
+        store i32 3, i32* %X
+        %tmp.3 = load i32, i32* %mem_tmp.i.0         ; <i32> [#uses=1]
+        ret i32 %tmp.3
+}
+
+
diff --git a/llvm/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll b/llvm/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll
new file mode 100644
index 00000000000..27c823b9e63
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep -- -65536
+
+define i1 @test(i32 %tmp.124) {
+        %tmp.125 = shl i32 %tmp.124, 8          ; <i32> [#uses=1]
+        %tmp.126.mask = and i32 %tmp.125, -16777216             ; <i32> [#uses=1]
+        %tmp.128 = icmp eq i32 %tmp.126.mask, 167772160         ; <i1> [#uses=1]
+        ret i1 %tmp.128
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll b/llvm/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll
new file mode 100644
index 00000000000..730fdc26aab
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | not grep and
+
+define i8 @test21(i8 %A) {
+        ;; sign extend
+        %C = ashr i8 %A, 7              ; <i8> [#uses=1]
+        ;; chop off sign
+        %D = and i8 %C, 1               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll b/llvm/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
new file mode 100644
index 00000000000..6a95c82374d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; This test case tests the InstructionCombining optimization that
+; reduces things like:
+;   %Y = sext i8 %X to i32
+;   %C = icmp ult i32 %Y, 1024
+; to
+;   %C = i1 true
+; It includes test cases for different constant values, signedness of the
+; cast operands, and types of setCC operators. In all cases, the cast should
+; be eliminated. In many cases the setCC is also eliminated based on the
+; constant value and the range of the casted value.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @lt_signed_to_large_unsigned(i8 %SB) {
+; CHECK-LABEL: @lt_signed_to_large_unsigned(
+; CHECK-NEXT:    [[C1:%.*]] = icmp sgt i8 %SB, -1
+; CHECK-NEXT:    ret i1 [[C1]]
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp ult i32 %Y, 1024
+  ret i1 %C
+}
+
+; PR28011 - https://llvm.org/bugs/show_bug.cgi?id=28011
+; The above transform only applies to scalar integers; it shouldn't be attempted for constant expressions or vectors.
+
+@a = common global i32** null
+@b = common global [1 x i32] zeroinitializer
+
+define i1 @PR28011(i16 %a) {
+; CHECK-LABEL: @PR28011(
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 %a to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[CONV]], or (i32 zext (i1 icmp ne (i32*** bitcast ([1 x i32]* @b to i32***), i32*** @a) to i32), i32 1)
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = sext i16 %a to i32
+  %cmp = icmp ne i32 %conv, or (i32 zext (i1 icmp ne (i32*** bitcast ([1 x i32]* @b to i32***), i32*** @a) to i32), i32 1)
+  ret i1 %cmp
+}
+
+define <2 x i1> @lt_signed_to_large_unsigned_vec(<2 x i8> %SB) {
+; CHECK-LABEL: @lt_signed_to_large_unsigned_vec(
+; CHECK-NEXT:    [[Y:%.*]] = sext <2 x i8> %SB to <2 x i32>
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[Y]], <i32 1024, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %Y = sext <2 x i8> %SB to <2 x i32>
+  %C = icmp ult <2 x i32> %Y, <i32 1024, i32 2>
+  ret <2 x i1> %C
+}
+
+define i1 @lt_signed_to_large_signed(i8 %SB) {
+; CHECK-LABEL: @lt_signed_to_large_signed(
+; CHECK-NEXT:    ret i1 true
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp slt i32 %Y, 1024
+  ret i1 %C
+}
+
+define i1 @lt_signed_to_large_negative(i8 %SB) {
+; CHECK-LABEL: @lt_signed_to_large_negative(
+; CHECK-NEXT:    ret i1 false
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp slt i32 %Y, -1024
+  ret i1 %C
+}
+
+define i1 @lt_signed_to_small_unsigned(i8 %SB) {
+; CHECK-LABEL: @lt_signed_to_small_unsigned(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 %SB, 17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp ult i32 %Y, 17
+  ret i1 %C
+}
+
+define i1 @lt_signed_to_small_signed(i8 %SB) {
+; CHECK-LABEL: @lt_signed_to_small_signed(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 %SB, 17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp slt i32 %Y, 17
+  ret i1 %C
+}
+define i1 @lt_signed_to_small_negative(i8 %SB) {
+; CHECK-LABEL: @lt_signed_to_small_negative(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 %SB, -17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp slt i32 %Y, -17
+  ret i1 %C
+}
+
+define i1 @lt_unsigned_to_large_unsigned(i8 %SB) {
+; CHECK-LABEL: @lt_unsigned_to_large_unsigned(
+; CHECK-NEXT:    ret i1 true
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp ult i32 %Y, 1024
+  ret i1 %C
+}
+
+define i1 @lt_unsigned_to_large_signed(i8 %SB) {
+; CHECK-LABEL: @lt_unsigned_to_large_signed(
+; CHECK-NEXT:    ret i1 true
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp slt i32 %Y, 1024
+  ret i1 %C
+}
+
+define i1 @lt_unsigned_to_large_negative(i8 %SB) {
+; CHECK-LABEL: @lt_unsigned_to_large_negative(
+; CHECK-NEXT:    ret i1 false
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp slt i32 %Y, -1024
+  ret i1 %C
+}
+
+define i1 @lt_unsigned_to_small_unsigned(i8 %SB) {
+; CHECK-LABEL: @lt_unsigned_to_small_unsigned(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 %SB, 17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp ult i32 %Y, 17
+  ret i1 %C
+}
+
+define i1 @lt_unsigned_to_small_signed(i8 %SB) {
+; CHECK-LABEL: @lt_unsigned_to_small_signed(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 %SB, 17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp slt i32 %Y, 17
+  ret i1 %C
+}
+
+define i1 @lt_unsigned_to_small_negative(i8 %SB) {
+; CHECK-LABEL: @lt_unsigned_to_small_negative(
+; CHECK-NEXT:    ret i1 false
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp slt i32 %Y, -17
+  ret i1 %C
+}
+
+define i1 @gt_signed_to_large_unsigned(i8 %SB) {
+; CHECK-LABEL: @gt_signed_to_large_unsigned(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 %SB, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp ugt i32 %Y, 1024
+  ret i1 %C
+}
+
+define i1 @gt_signed_to_large_signed(i8 %SB) {
+; CHECK-LABEL: @gt_signed_to_large_signed(
+; CHECK-NEXT:    ret i1 false
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp sgt i32 %Y, 1024
+  ret i1 %C
+}
+
+define i1 @gt_signed_to_large_negative(i8 %SB) {
+; CHECK-LABEL: @gt_signed_to_large_negative(
+; CHECK-NEXT:    ret i1 true
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp sgt i32 %Y, -1024
+  ret i1 %C
+}
+
+define i1 @gt_signed_to_small_unsigned(i8 %SB) {
+; CHECK-LABEL: @gt_signed_to_small_unsigned(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 %SB, 17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp ugt i32 %Y, 17
+  ret i1 %C
+}
+
+define i1 @gt_signed_to_small_signed(i8 %SB) {
+; CHECK-LABEL: @gt_signed_to_small_signed(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 %SB, 17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp sgt i32 %Y, 17
+  ret i1 %C
+}
+
+define i1 @gt_signed_to_small_negative(i8 %SB) {
+; CHECK-LABEL: @gt_signed_to_small_negative(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 %SB, -17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = sext i8 %SB to i32
+  %C = icmp sgt i32 %Y, -17
+  ret i1 %C
+}
+
+define i1 @gt_unsigned_to_large_unsigned(i8 %SB) {
+; CHECK-LABEL: @gt_unsigned_to_large_unsigned(
+; CHECK-NEXT:    ret i1 false
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp ugt i32 %Y, 1024
+  ret i1 %C
+}
+
+define i1 @gt_unsigned_to_large_signed(i8 %SB) {
+; CHECK-LABEL: @gt_unsigned_to_large_signed(
+; CHECK-NEXT:    ret i1 false
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp sgt i32 %Y, 1024
+  ret i1 %C
+}
+
+define i1 @gt_unsigned_to_large_negative(i8 %SB) {
+; CHECK-LABEL: @gt_unsigned_to_large_negative(
+; CHECK-NEXT:    ret i1 true
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp sgt i32 %Y, -1024
+  ret i1 %C
+}
+
+define i1 @gt_unsigned_to_small_unsigned(i8 %SB) {
+; CHECK-LABEL: @gt_unsigned_to_small_unsigned(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 %SB, 17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp ugt i32 %Y, 17
+  ret i1 %C
+}
+
+define i1 @gt_unsigned_to_small_signed(i8 %SB) {
+; CHECK-LABEL: @gt_unsigned_to_small_signed(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 %SB, 17
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp sgt i32 %Y, 17
+  ret i1 %C
+}
+
+define i1 @gt_unsigned_to_small_negative(i8 %SB) {
+; CHECK-LABEL: @gt_unsigned_to_small_negative(
+; CHECK-NEXT:    ret i1 true
+;
+  %Y = zext i8 %SB to i32
+  %C = icmp sgt i32 %Y, -17
+  ret i1 %C
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll b/llvm/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll
new file mode 100644
index 00000000000..008afa8a787
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine
+
+define i32 @test(i32 %X) {
+        %Y = srem i32 %X, undef         ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll b/llvm/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
new file mode 100644
index 00000000000..02bc043da0d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   not grep "ret i1 false"
+
+define i1 @test(i64 %tmp.169) {
+        %tmp.1710 = lshr i64 %tmp.169, 1                ; <i64> [#uses=1]
+        %tmp.1912 = icmp ugt i64 %tmp.1710, 0           ; <i1> [#uses=1]
+        ret i1 %tmp.1912
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll b/llvm/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
new file mode 100644
index 00000000000..1ec118006de
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define i32 @test(i1 %C, i32 %tmp.15) {
+        %tmp.16 = select i1 %C, i32 8, i32 1            ; <i32> [#uses=1]
+        %tmp.18 = udiv i32 %tmp.15, %tmp.16             ; <i32> [#uses=1]
+        ret i32 %tmp.18
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll b/llvm/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll
new file mode 100644
index 00000000000..9846ee72dd4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define i32 @_Z13func_31585107li(i32 %l_39521025, i32 %l_59244666) {
+        %shortcirc_val = select i1 false, i32 1, i32 0          ; <i32> [#uses=1]
+        %tmp.8 = udiv i32 0, %shortcirc_val             ; <i32> [#uses=1]
+        %tmp.9 = icmp eq i32 %tmp.8, 0          ; <i1> [#uses=1]
+        %retval = select i1 %tmp.9, i32 %l_59244666, i32 -1621308501            ; <i32> [#uses=1]
+        ret i32 %retval
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll b/llvm/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll
new file mode 100644
index 00000000000..e2d0618a41e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR577
+
+define i1 @test() {
+        %tmp.3 = shl i32 0, 41          ; <i32> [#uses=1]
+        %tmp.4 = icmp ne i32 %tmp.3, 0          ; <i1> [#uses=1]
+        ret i1 %tmp.4
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll b/llvm/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll
new file mode 100644
index 00000000000..f0e60aca59f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR585
+
+define i1 @test() {
+        %tmp.26 = sdiv i32 0, -2147483648               ; <i32> [#uses=1]
+        %tmp.27 = icmp eq i32 %tmp.26, 0                ; <i1> [#uses=1]
+        ret i1 %tmp.27
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll b/llvm/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll
new file mode 100644
index 00000000000..caee951b704
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -disable-output
+
+; This example caused instcombine to spin into an infinite loop.
+
+define void @test(i32* %P) {
+        ret void
+
+Dead:           ; preds = %Dead
+        %X = phi i32 [ %Y, %Dead ]              ; <i32> [#uses=1]
+        %Y = sdiv i32 %X, 10            ; <i32> [#uses=2]
+        store i32 %Y, i32* %P
+        br label %Dead
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll b/llvm/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll
new file mode 100644
index 00000000000..10541ef7032
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   not grep undef
+
+define i32 @test(i8 %A) {
+        %B = sext i8 %A to i32          ; <i32> [#uses=1]
+        %C = ashr i32 %B, 8             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+
diff --git a/llvm/test/Transforms/InstCombine/2006-02-28-Crash.ll b/llvm/test/Transforms/InstCombine/2006-02-28-Crash.ll
new file mode 100644
index 00000000000..9bea14c027e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-02-28-Crash.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define i32 @test() {
+        %tmp203 = icmp eq i32 1, 2              ; <i1> [#uses=1]
+        %tmp203.upgrd.1 = zext i1 %tmp203 to i32                ; <i32> [#uses=1]
+        ret i32 %tmp203.upgrd.1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll b/llvm/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll
new file mode 100644
index 00000000000..aa7d58786b4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define float @test(<4 x float> %V) {
+        %V2 = insertelement <4 x float> %V, float 1.000000e+00, i32 3           ; <<4 x float>> [#uses=1]
+        %R = extractelement <4 x float> %V2, i32 2              ; <float> [#uses=1]
+        ret float %R
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll b/llvm/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
new file mode 100644
index 00000000000..baaafefa81d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This cannot be turned into a sign extending cast!
+
+define i64 @test(i64 %X) {
+        %Y = shl i64 %X, 16             ; <i64> [#uses=1]
+; CHECK: %Y = shl i64 %X, 16
+        %Z = ashr i64 %Y, 16            ; <i64> [#uses=1]
+; CHECK: %Z = ashr exact i64 %Y, 16
+        ret i64 %Z
+; CHECK: ret i64 %Z
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll b/llvm/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll
new file mode 100644
index 00000000000..e22395fb877
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -instcombine -disable-output
+; END.
+
+define void @test() {
+bb38.i:
+	%varspec.0.i1014 = bitcast i64 123814269237067777 to i64		; <i64> [#uses=1]
+	%locspec.0.i1015 = bitcast i32 1 to i32		; <i32> [#uses=2]
+	%tmp51391.i1018 = lshr i64 %varspec.0.i1014, 16		; <i64> [#uses=1]
+	%tmp51392.i1019 = trunc i64 %tmp51391.i1018 to i32		; <i32> [#uses=2]
+	%tmp51392.mask.i1020 = lshr i32 %tmp51392.i1019, 29		; <i32> [#uses=1]
+	%tmp7.i1021 = and i32 %tmp51392.mask.i1020, 1		; <i32> [#uses=2]
+	%tmp18.i1026 = lshr i32 %tmp51392.i1019, 31		; <i32> [#uses=2]
+	%tmp18.i1027 = trunc i32 %tmp18.i1026 to i8		; <i8> [#uses=1]
+	br i1 false, label %cond_false1148.i1653, label %bb377.i1259
+
+bb377.i1259:		; preds = %bb38.i
+	br i1 false, label %cond_true541.i1317, label %cond_false1148.i1653
+
+cond_true541.i1317:		; preds = %bb377.i1259
+	%tmp545.i1318 = lshr i32 %locspec.0.i1015, 10		; <i32> [#uses=1]
+	%tmp550.i1319 = lshr i32 %locspec.0.i1015, 4		; <i32> [#uses=1]
+	%tmp550551.i1320 = and i32 %tmp550.i1319, 63		; <i32> [#uses=1]
+	%tmp553.i1321 = icmp ult i32 %tmp550551.i1320, 4		; <i1> [#uses=1]
+	%tmp558.i1322 = icmp eq i32 %tmp7.i1021, 0		; <i1> [#uses=1]
+	%bothcond.i1326 = or i1 %tmp553.i1321, false		; <i1> [#uses=1]
+	%bothcond1.i1327 = or i1 %bothcond.i1326, false		; <i1> [#uses=1]
+	%bothcond2.not.i1328 = or i1 %bothcond1.i1327, false		; <i1> [#uses=1]
+	%bothcond3.i1329 = or i1 %bothcond2.not.i1328, %tmp558.i1322		; <i1> [#uses=0]
+	br i1 false, label %cond_true583.i1333, label %cond_next592.i1337
+
+cond_true583.i1333:		; preds = %cond_true541.i1317
+	br i1 false, label %cond_true586.i1335, label %cond_next592.i1337
+
+cond_true586.i1335:		; preds = %cond_true583.i1333
+	br label %cond_true.i
+
+cond_next592.i1337:		; preds = %cond_true583.i1333, %cond_true541.i1317
+	%mask_z.0.i1339 = phi i32 [ %tmp18.i1026, %cond_true541.i1317 ], [ 0, %cond_true583.i1333 ]		; <i32> [#uses=0]
+	%tmp594.i1340 = and i32 %tmp545.i1318, 15		; <i32> [#uses=0]
+	br label %cond_true.i
+
+cond_false1148.i1653:		; preds = %bb377.i1259, %bb38.i
+	%tmp1150.i1654 = icmp eq i32 %tmp7.i1021, 0		; <i1> [#uses=1]
+	%tmp1160.i1656 = icmp eq i8 %tmp18.i1027, 0		; <i1> [#uses=1]
+	%bothcond8.i1658 = or i1 %tmp1150.i1654, %tmp1160.i1656		; <i1> [#uses=1]
+	%bothcond9.i1659 = or i1 %bothcond8.i1658, false		; <i1> [#uses=0]
+	br label %cond_true.i
+
+cond_true.i:		; preds = %cond_false1148.i1653, %cond_next592.i1337, %cond_true586.i1335
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2006-09-15-CastToBool.ll b/llvm/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
new file mode 100644
index 00000000000..2eb28a3f6bb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep and
+; PR913
+
+define i32 @test(i32* %tmp1) {
+        %tmp.i = load i32, i32* %tmp1                ; <i32> [#uses=1]
+        %tmp = bitcast i32 %tmp.i to i32                ; <i32> [#uses=1]
+        %tmp2.ui = lshr i32 %tmp, 5             ; <i32> [#uses=1]
+        %tmp2 = bitcast i32 %tmp2.ui to i32             ; <i32> [#uses=1]
+        %tmp3 = and i32 %tmp2, 1                ; <i32> [#uses=1]
+        %tmp3.upgrd.1 = icmp ne i32 %tmp3, 0            ; <i1> [#uses=1]
+        %tmp34 = zext i1 %tmp3.upgrd.1 to i32           ; <i32> [#uses=1]
+        ret i32 %tmp34
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll b/llvm/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll
new file mode 100644
index 00000000000..889bbcfa3ea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll
@@ -0,0 +1,10 @@
+; The optimizer should be able to remove cast operation here.
+; RUN: opt < %s -instcombine -S | \
+; RUN:    not grep sext.*i32
+
+define i1 @eq_signed_to_small_unsigned(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp eq i32 %Y, 17         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-10-20-mask.ll b/llvm/test/Transforms/InstCombine/2006-10-20-mask.ll
new file mode 100644
index 00000000000..e9797ae50c9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-10-20-mask.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: and
+
+define i64 @foo(i64 %tmp, i64 %tmp2) {
+        %tmp.upgrd.1 = trunc i64 %tmp to i32            ; <i32> [#uses=1]
+        %tmp2.upgrd.2 = trunc i64 %tmp2 to i32          ; <i32> [#uses=1]
+        %tmp3 = and i32 %tmp.upgrd.1, %tmp2.upgrd.2             ; <i32> [#uses=1]
+        %tmp4 = zext i32 %tmp3 to i64           ; <i64> [#uses=1]
+        ret i64 %tmp4
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
new file mode 100644
index 00000000000..b5a8686049c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -0,0 +1,145 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fmul(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this folds with 'fast'
+define <4 x float> @test_fmul_fast(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
+define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V * C1) * C2 => V * (C1 * C2)
+; TODO: This doesn't require 'nsz'.  It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
+define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fadd(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this folds with 'fast'
+define <4 x float> @test_fadd_fast(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
+define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; TODO: This doesn't require 'nsz'.  It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
+define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
+; CHECK-NEXT:     [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:     ret <4 x float> [[TMP3]]
+        %X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this folds to 'A + B' with 'fast'
+define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd fast <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
+define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd reassoc nsz <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify the fold is not done with only 'reassoc' ('nsz' is required).
+define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
+; CHECK-NEXT:     [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:     ret <4 x float> [[TMP3]]
+        %X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd reassoc <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
diff --git a/llvm/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll b/llvm/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll
new file mode 100644
index 00000000000..7799423f04f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep lshr
+; Verify this is not turned into -1.
+
+define i32 @test(i8 %amt) {
+        %shift.upgrd.1 = zext i8 %amt to i32            ; <i32> [#uses=1]
+        %B = lshr i32 -1, %shift.upgrd.1                ; <i32> [#uses=1]
+        ret i32 %B
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll b/llvm/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
new file mode 100644
index 00000000000..b4285ab8209
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <4 x float> @test(<4 x float> %tmp26, <4 x float> %tmp53) {
+        ; (X+Y)-Y != X for fp vectors.
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[TMP64:%.*]] = fadd <4 x float> %tmp26, %tmp53
+; CHECK-NEXT:    [[TMP75:%.*]] = fsub <4 x float> [[TMP64]], %tmp53
+; CHECK-NEXT:    ret <4 x float> [[TMP75]]
+;
+  %tmp64 = fadd <4 x float> %tmp26, %tmp53
+  %tmp75 = fsub <4 x float> %tmp64, %tmp53
+  ret <4 x float> %tmp75
+}
diff --git a/llvm/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll b/llvm/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll
new file mode 100644
index 00000000000..74483c1d9c7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | grep zext
+
+; Never merge these two conversions, even though it's possible: this is
+; significantly more expensive than the two conversions on some targets
+; and it causes libgcc to be compile __fixunsdfdi into a recursive 
+; function.
+define i64 @test(double %D) {
+        %A = fptoui double %D to i32            ; <i32> [#uses=1]
+        %B = zext i32 %A to i64         ; <i64> [#uses=1]
+        ret i64 %B
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll b/llvm/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
new file mode 100644
index 00000000000..60113fb5620
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep "icmp sgt"
+; END.
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.point = type { i32, i32 }
+
+define i32 @visible(i32 %direction, i64 %p1.0, i64 %p2.0, i64 %p3.0) {
+entry:
+	%p1_addr = alloca %struct.point		; <%struct.point*> [#uses=2]
+	%p2_addr = alloca %struct.point		; <%struct.point*> [#uses=2]
+	%p3_addr = alloca %struct.point		; <%struct.point*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = bitcast %struct.point* %p1_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp.upgrd.1 = getelementptr { i64 }, { i64 }* %tmp, i64 0, i32 0		; <i64*> [#uses=1]
+	store i64 %p1.0, i64* %tmp.upgrd.1
+	%tmp1 = bitcast %struct.point* %p2_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp2 = getelementptr { i64 }, { i64 }* %tmp1, i64 0, i32 0		; <i64*> [#uses=1]
+	store i64 %p2.0, i64* %tmp2
+	%tmp3 = bitcast %struct.point* %p3_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp4 = getelementptr { i64 }, { i64 }* %tmp3, i64 0, i32 0		; <i64*> [#uses=1]
+	store i64 %p3.0, i64* %tmp4
+	%tmp.upgrd.2 = icmp eq i32 %direction, 0		; <i1> [#uses=1]
+	%tmp5 = bitcast %struct.point* %p1_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp6 = getelementptr { i64 }, { i64 }* %tmp5, i64 0, i32 0		; <i64*> [#uses=1]
+	%tmp.upgrd.3 = load i64, i64* %tmp6		; <i64> [#uses=1]
+	%tmp7 = bitcast %struct.point* %p2_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp8 = getelementptr { i64 }, { i64 }* %tmp7, i64 0, i32 0		; <i64*> [#uses=1]
+	%tmp9 = load i64, i64* %tmp8		; <i64> [#uses=1]
+	%tmp10 = bitcast %struct.point* %p3_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp11 = getelementptr { i64 }, { i64 }* %tmp10, i64 0, i32 0		; <i64*> [#uses=1]
+	%tmp12 = load i64, i64* %tmp11		; <i64> [#uses=1]
+	%tmp13 = call i32 @determinant( i64 %tmp.upgrd.3, i64 %tmp9, i64 %tmp12 )		; <i32> [#uses=2]
+	br i1 %tmp.upgrd.2, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	%tmp14 = icmp slt i32 %tmp13, 0		; <i1> [#uses=1]
+	%tmp14.upgrd.4 = zext i1 %tmp14 to i32		; <i32> [#uses=1]
+	br label %return
+
+cond_false:		; preds = %entry
+	%tmp26 = icmp sgt i32 %tmp13, 0		; <i1> [#uses=1]
+	%tmp26.upgrd.5 = zext i1 %tmp26 to i32		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %cond_false, %cond_true
+	%retval.0 = phi i32 [ %tmp14.upgrd.4, %cond_true ], [ %tmp26.upgrd.5, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %retval.0
+}
+
+declare i32 @determinant(i64, i64, i64)
diff --git a/llvm/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll b/llvm/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
new file mode 100644
index 00000000000..f54416d1425
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+        %struct.point = type { i32, i32 }
+
+define i32 @visible(i32 %direction, i64 %p1.0, i64 %p2.0, i64 %p3.0) {
+entry:
+        %p1_addr = alloca %struct.point         ; <%struct.point*> [#uses=2]
+        %p2_addr = alloca %struct.point         ; <%struct.point*> [#uses=2]
+        %p3_addr = alloca %struct.point         ; <%struct.point*> [#uses=2]
+        %tmp = bitcast %struct.point* %p1_addr to { i64 }*              ; <{ i64 }*> [#uses=1]
+        %tmp.upgrd.1 = getelementptr { i64 }, { i64 }* %tmp, i32 0, i32 0                ; <i64*> [#uses=1]
+        store i64 %p1.0, i64* %tmp.upgrd.1
+        %tmp1 = bitcast %struct.point* %p2_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
+        %tmp2 = getelementptr { i64 }, { i64 }* %tmp1, i32 0, i32 0              ; <i64*> [#uses=1]
+        store i64 %p2.0, i64* %tmp2
+        %tmp3 = bitcast %struct.point* %p3_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
+        %tmp4 = getelementptr { i64 }, { i64 }* %tmp3, i32 0, i32 0              ; <i64*> [#uses=1]
+        store i64 %p3.0, i64* %tmp4
+        %tmp.upgrd.2 = icmp eq i32 %direction, 0                ; <i1> [#uses=1]
+        %tmp5 = bitcast %struct.point* %p1_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
+        %tmp6 = getelementptr { i64 }, { i64 }* %tmp5, i32 0, i32 0              ; <i64*> [#uses=1]
+        %tmp.upgrd.3 = load i64, i64* %tmp6          ; <i64> [#uses=1]
+        %tmp7 = bitcast %struct.point* %p2_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
+        %tmp8 = getelementptr { i64 }, { i64 }* %tmp7, i32 0, i32 0              ; <i64*> [#uses=1]
+        %tmp9 = load i64, i64* %tmp8         ; <i64> [#uses=1]
+        %tmp10 = bitcast %struct.point* %p3_addr to { i64 }*            ; <{ i64 }*> [#uses=1]
+        %tmp11 = getelementptr { i64 }, { i64 }* %tmp10, i32 0, i32 0            ; <i64*> [#uses=1]
+        %tmp12 = load i64, i64* %tmp11               ; <i64> [#uses=1]
+        %tmp13 = call i32 @determinant( i64 %tmp.upgrd.3, i64 %tmp9, i64 %tmp12 )         ; <i32> [#uses=2]
+        %tmp14 = icmp slt i32 %tmp13, 0         ; <i1> [#uses=1]
+        %tmp26 = icmp sgt i32 %tmp13, 0         ; <i1> [#uses=1]
+        %retval.0.in = select i1 %tmp.upgrd.2, i1 %tmp14, i1 %tmp26             ; <i1> [#uses=1]
+        %retval.0 = zext i1 %retval.0.in to i32         ; <i32> [#uses=1]
+        ret i32 %retval.0
+}
+
+declare i32 @determinant(i64, i64, i64)
+
diff --git a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
new file mode 100644
index 00000000000..784b3e4fe68
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep icmp | count 1
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep "icmp ugt" | count 1
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+@r = external global [17 x i32]         ; <[17 x i32]*> [#uses=1]
+
+define i1 @print_pgm_cond_true(i32 %tmp12.reload, i32* %tmp16.out) {
+newFuncRoot:
+        br label %cond_true
+
+bb27.exitStub:          ; preds = %cond_true
+        store i32 %tmp16, i32* %tmp16.out
+        ret i1 true
+
+cond_next23.exitStub:           ; preds = %cond_true
+        store i32 %tmp16, i32* %tmp16.out
+        ret i1 false
+
+cond_true:              ; preds = %newFuncRoot
+        %tmp15 = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 %tmp12.reload         ; <i32*> [#uses=1]
+        %tmp16 = load i32, i32* %tmp15               ; <i32> [#uses=4]
+        %tmp18 = icmp slt i32 %tmp16, -31               ; <i1> [#uses=1]
+        %tmp21 = icmp sgt i32 %tmp16, 31                ; <i1> [#uses=1]
+        %bothcond = or i1 %tmp18, %tmp21                ; <i1> [#uses=1]
+        br i1 %bothcond, label %bb27.exitStub, label %cond_next23.exitStub
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll b/llvm/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
new file mode 100644
index 00000000000..9251e9b455d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
@@ -0,0 +1,30 @@
+; For PR1065. This causes an assertion in instcombine if a select with two cmp
+; operands is encountered.
+; RUN: opt < %s -instcombine -disable-output
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.internal_state = type { i32 }
+	%struct.mng_data = type { i32, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i32, i32, i32, i8, i32, i32, i32, i32, i16, i16, i16, i8, i8, double, double, double, i8, i8, i8, i8, i32, i32, i32, i32, i32, i8, i32, i32, i8*, i8* (i32)*, void (i8*, i32)*, void (i8*, i8*, i32)*, i8 (%struct.mng_data*)*, i8 (%struct.mng_data*)*, i8 (%struct.mng_data*, i8*, i32, i32*)*, i8 (%struct.mng_data*, i8*, i32, i32*)*, i8 (%struct.mng_data*, i32, i8, i32, i32, i32, i32, i8*)*, i8 (%struct.mng_data*, i32, i32, i8*)*, i8 (%struct.mng_data*, i32, i32)*, i8 (%struct.mng_data*, i8, i8*, i8*, i8*, i8*)*, i8 (%struct.mng_data*)*, i8 (%struct.mng_data*, i8*)*, i8 (%struct.mng_data*, i8*)*, i8 (%struct.mng_data*, i32, i32)*, i8 (%struct.mng_data*, i32, i32, i8*)*, i8 (%struct.mng_data*, i8, i8, i32, i32)*, i8* (%struct.mng_data*, i32)*, i8* (%struct.mng_data*, i32)*, i8* (%struct.mng_data*, i32)*, i8 (%struct.mng_data*, i32, i32, i32, i32)*, i32 (%struct.mng_data*)*, i8 (%struct.mng_data*, i32)*, i8 (%struct.mng_data*, i32)*, i8 (%struct.mng_data*, i32, i32, i32, i32, i32, i32, i32, i32)*, i8 (%struct.mng_data*, i8)*, i8 (%struct.mng_data*, i32, i8*)*, i8 (%struct.mng_data*, i32, i8, i8*)*, i8, i32, i32, i8*, i8*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i8, i8, i8, i8, i8, i32, i8, i8, i8, i32, i8*, i32, i8*, i32, i8, i8, i8, i32, i8*, i8*, i32, i32, i8*, i8*, %struct.mng_pushdata*, %struct.mng_pushdata*, %struct.mng_pushdata*, %struct.mng_pushdata*, i8, i8, i32, i32, i8*, i8, i8, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i8*, i32, i32, i32, i8, i8, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.mng_savedata*, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [256 x i8], double, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, i16, i8, i8, i8, i8, i8, i32, i32, i8, i32, i32, i32, i32, i16, i16, i16, i8, i16, i8, i32, i32, i32, i32, i8, i32, i32, i8, i32, i32, i32, i32, i8, i32, i32, i8, i32, i32, i32, i32, i32, i8, i32, i8, i16, i16, i16, i16, i32, [256 x %struct.mng_palette8e], i32, [256 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i32, i8*, i16, i16, i16, i8*, i8, i8, i32, i32, i32, i32, i8, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, i8*, i8, i8, i8, i32, i8*, i8*, i16, i16, i16, i16, i32, i32, i8*, %struct.z_stream, i32, i32, i32, i32, i32, i32, i8, i8, [256 x i32], i8 }
+	%struct.mng_palette8e = type { i8, i8, i8 }
+	%struct.mng_pushdata = type { i8*, i8*, i32, i8, i8*, i32 }
+	%struct.mng_savedata = type { i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i8, i16, i8, i8, i32, i32, i8, i32, i32, i32, i32, i32, [256 x %struct.mng_palette8e], i32, [256 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i32, i8*, i16, i16, i16 }
+	%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
+
+define void @mng_write_basi() {
+entry:
+	%tmp = load i8, i8* null		; <i8> [#uses=1]
+	%tmp.upgrd.1 = icmp ugt i8 %tmp, 8		; <i1> [#uses=1]
+	%tmp.upgrd.2 = load i16, i16* null		; <i16> [#uses=2]
+	%tmp3 = icmp eq i16 %tmp.upgrd.2, 255		; <i1> [#uses=1]
+	%tmp7 = icmp eq i16 %tmp.upgrd.2, -1		; <i1> [#uses=1]
+	%bOpaque.0.in = select i1 %tmp.upgrd.1, i1 %tmp7, i1 %tmp3		; <i1> [#uses=1]
+	br i1 %bOpaque.0.in, label %cond_next90, label %bb95
+
+cond_next90:		; preds = %entry
+	ret void
+
+bb95:		; preds = %entry
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll b/llvm/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
new file mode 100644
index 00000000000..635a09ca93c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep "icmp ugt"
+; PR1107
+; PR1940
+
+define i1 @test(i8 %A, i8 %B) {
+	%a = zext i8 %A to i32
+	%b = zext i8 %B to i32
+	%c = icmp sgt i32 %a, %b
+	ret i1 %c
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll b/llvm/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll
new file mode 100644
index 00000000000..fed2255c0e2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define <4 x i32> @test(<4 x i32> %A) {
+    %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > 
+    %C = and <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >
+    ret <4 x i32> %C
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll b/llvm/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
new file mode 100644
index 00000000000..113ada3df9d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -instcombine -mem2reg -S | grep "%A = alloca" 
+; RUN: opt < %s -instcombine -mem2reg -S | \
+; RUN:    not grep "%B = alloca"
+; END.
+
+; Ensure that instcombine doesn't sink the loads in entry/cond_true into 
+; cond_next.  Doing so prevents mem2reg from promoting the B alloca.
+
+define i32 @test2(i32 %C) {
+entry:
+	%A = alloca i32
+	%B = alloca i32
+	%tmp = call i32 (...) @bar( i32* %A )		; <i32> [#uses=0]
+	%T = load i32, i32* %A		; <i32> [#uses=1]
+	%tmp2 = icmp eq i32 %C, 0		; <i1> [#uses=1]
+	br i1 %tmp2, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 123, i32* %B
+	call i32 @test2( i32 123 )		; <i32>:0 [#uses=0]
+	%T1 = load i32, i32* %B		; <i32> [#uses=1]
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp1.0 = phi i32 [ %T1, %cond_true ], [ %T, %entry ]		; <i32> [#uses=1]
+	%tmp7 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp8 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp9 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp10 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp11 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp12 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp13 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp15 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp16 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp18 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp19 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	%tmp20 = call i32 (...) @baq( )		; <i32> [#uses=0]
+	ret i32 %tmp1.0
+}
+
+declare i32 @bar(...)
+
+declare i32 @baq(...)
diff --git a/llvm/test/Transforms/InstCombine/2007-02-07-PointerCast.ll b/llvm/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
new file mode 100644
index 00000000000..ddc1e03ddd2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
@@ -0,0 +1,22 @@
+;RUN: opt < %s -instcombine -S | grep zext
+
+; Make sure the uint isn't removed.  Instcombine in llvm 1.9 was dropping the 
+; uint cast which was causing a sign extend. This only affected code with 
+; pointers in the high half of memory, so it wasn't noticed much
+; compile a kernel though...
+
+target datalayout = "e-p:32:32"
+@str = internal constant [6 x i8] c"%llx\0A\00"         ; <[6 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %x, i8** %a) {
+entry:
+        %tmp = getelementptr [6 x i8], [6 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
+        %tmp1 = load i8*, i8** %a            ; <i8*> [#uses=1]
+        %tmp2 = ptrtoint i8* %tmp1 to i32               ; <i32> [#uses=1]
+        %tmp3 = zext i32 %tmp2 to i64           ; <i64> [#uses=1]
+        %tmp.upgrd.1 = call i32 (i8*, ...) @printf( i8* %tmp, i64 %tmp3 )              ; <i32> [#uses=0]
+        ret i32 0
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll b/llvm/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll
new file mode 100644
index 00000000000..f31c280661d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -S | grep ret
+; PR1217
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
+
+
+define void @ggenorien() {
+entry:
+	%tmp68 = icmp eq %struct.termbox* null, null		; <i1> [#uses=1]
+	br i1 %tmp68, label %cond_next448, label %bb80
+
+bb80:		; preds = %entry
+	ret void
+
+cond_next448:		; preds = %entry
+	br i1 false, label %bb756, label %bb595
+
+bb595:		; preds = %cond_next448
+	br label %bb609
+
+bb609:		; preds = %bb756, %bb595
+	%termnum.6240.0 = phi i32 [ 2, %bb595 ], [ %termnum.6, %bb756 ]		; <i32> [#uses=1]
+	%tmp755 = add i32 %termnum.6240.0, 1		; <i32> [#uses=1]
+	br label %bb756
+
+bb756:		; preds = %bb609, %cond_next448
+	%termnum.6 = phi i32 [ %tmp755, %bb609 ], [ 2, %cond_next448 ]		; <i32> [#uses=1]
+	br label %bb609
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll b/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
new file mode 100644
index 00000000000..826d68aefc1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep "icmp sle"
+; PR1244
+
+define i1 @test(i32 %c.3.i, i32 %d.292.2.i) {
+   %tmp266.i = icmp slt i32 %c.3.i, %d.292.2.i     
+   %tmp276.i = icmp eq i32 %c.3.i, %d.292.2.i 
+   %sel_tmp80 = or i1 %tmp266.i, %tmp276.i 
+   ret i1 %sel_tmp80
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll b/llvm/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll
new file mode 100644
index 00000000000..589bd805d6d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep zext
+; PR1261. 
+
+define i16 @test(i31 %zzz) {
+  %A = sext i31 %zzz to i32
+  %B = add i32 %A, 16384
+  %C = lshr i32 %B, 15
+  %D = trunc i32 %C to i16
+  ret i16 %D
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
new file mode 100644
index 00000000000..ffcfe2683cf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; For PR1248
+
+define i1 @test(i32 %tmp6) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[TMP6_OFF:%.*]] = add i32 %tmp6, 83
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[TMP6_OFF]], 11
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp7 = sdiv i32 %tmp6, 12
+  icmp ne i32 %tmp7, -6
+  ret i1 %1
+}
+
+define <2 x i1> @test_vec(<2 x i32> %tmp6) {
+; CHECK-LABEL: @test_vec(
+; CHECK-NEXT:    [[TMP6_OFF:%.*]] = add <2 x i32> %tmp6, <i32 83, i32 83>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i32> [[TMP6_OFF]], <i32 11, i32 11>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp7 = sdiv <2 x i32> %tmp6, <i32 12, i32 12>
+  icmp ne <2 x i32> %tmp7, <i32 -6, i32 -6>
+  ret <2 x i1> %1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll b/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
new file mode 100644
index 00000000000..9fe29312793
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; PR1271
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%struct..0anon = type { i32, i32 }
+%struct..1anon = type { double }
+
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[U:%.*]] = alloca %struct..1anon, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds %struct..1anon, %struct..1anon* [[U]], i64 0, i32 0
+; CHECK-NEXT:    store double 0x7FF0000000000000, double* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP34:%.*]] = bitcast %struct..1anon* [[U]] to %struct..0anon*
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds %struct..0anon, %struct..0anon* [[TMP34]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP89:%.*]] = and i32 [[TMP6]], 2146435072
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[TMP89]], 2146435072
+; CHECK-NEXT:    br i1 [[TMP0]], label %cond_false, label %cond_true
+; CHECK:       cond_true:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       cond_false:
+; CHECK-NEXT:    ret i32 1
+;
+entry:
+  %u = alloca %struct..1anon, align 8
+  %tmp1 = getelementptr %struct..1anon, %struct..1anon* %u, i32 0, i32 0
+  store double 0x7FF0000000000000, double* %tmp1
+  %tmp3 = getelementptr %struct..1anon, %struct..1anon* %u, i32 0, i32 0
+  %tmp34 = bitcast double* %tmp3 to %struct..0anon*
+  %tmp5 = getelementptr %struct..0anon, %struct..0anon* %tmp34, i32 0, i32 1
+  %tmp6 = load i32, i32* %tmp5
+  %tmp7 = shl i32 %tmp6, 1
+  %tmp8 = lshr i32 %tmp7, 21
+  %tmp89 = trunc i32 %tmp8 to i16
+  icmp ne i16 %tmp89, 2047
+  zext i1 %0 to i8
+  icmp ne i8 %1, 0
+  br i1 %2, label %cond_true, label %cond_false
+
+cond_true:
+  ret i32 0
+
+cond_false:
+  ret i32 1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll b/llvm/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll
new file mode 100644
index 00000000000..0d4aac25c28
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll
@@ -0,0 +1,9 @@
+; PR1271
+; RUN: opt < %s -instcombine -S | grep and
+define i1 @test(i32 %tmp13) {
+entry:
+	%tmp14 = shl i32 %tmp13, 12		; <i32> [#uses=1]
+	%tmp15 = lshr i32 %tmp14, 12		; <i32> [#uses=1]
+	%res = icmp ne i32 %tmp15, 0		; <i1>:3 [#uses=1]
+        ret i1 %res
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll b/llvm/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
new file mode 100644
index 00000000000..c4070a1b1db
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
@@ -0,0 +1,35 @@
+; PR1271
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep "ashr exact i32 %.mp137, 2"
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+
+
+define i1 @test(i32* %tmp141, i32* %tmp145, 
+            i32 %b8, i32 %iftmp.430.0, i32* %tmp134.out, i32* %tmp137.out)
+{
+newFuncRoot:
+	%tmp133 = and i32 %b8, 1		; <i32> [#uses=1]
+	%tmp134 = shl i32 %tmp133, 3		; <i32> [#uses=3]
+	%tmp136 = ashr i32 %b8, 1		; <i32> [#uses=1]
+	%tmp137 = shl i32 %tmp136, 3		; <i32> [#uses=3]
+	%tmp139 = ashr i32 %tmp134, 2		; <i32> [#uses=1]
+	store i32 %tmp139, i32* %tmp141
+	%tmp143 = ashr i32 %tmp137, 2		; <i32> [#uses=1]
+	store i32 %tmp143, i32* %tmp145
+	icmp eq i32 %iftmp.430.0, 0		; <i1>:0 [#uses=1]
+	zext i1 %0 to i8		; <i8>:1 [#uses=1]
+	icmp ne i8 %1, 0		; <i1>:2 [#uses=1]
+	br i1 %2, label %cond_true147.exitStub, label %cond_false252.exitStub
+
+cond_true147.exitStub:		; preds = %newFuncRoot
+	store i32 %tmp134, i32* %tmp134.out
+	store i32 %tmp137, i32* %tmp137.out
+	ret i1 true
+
+cond_false252.exitStub:		; preds = %newFuncRoot
+	store i32 %tmp134, i32* %tmp134.out
+	store i32 %tmp137, i32* %tmp137.out
+	ret i1 false
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll b/llvm/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll
new file mode 100644
index 00000000000..22eb2c23c34
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR1304
+
+define i64 @bork(<1 x i64> %vec) {
+  %tmp = extractelement <1 x i64> %vec, i32 0
+  ret i64 %tmp
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll b/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll
new file mode 100644
index 00000000000..4af5dfeef5d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-05-10-icmp-or.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -disable-output
+define i1 @test(i32 %tmp9) {
+        %tmp20 = icmp ugt i32 %tmp9, 255                ; <i1> [#uses=1]
+        %tmp11.not = icmp sgt i32 %tmp9, 255            ; <i1> [#uses=1]
+        %bothcond = or i1 %tmp20, %tmp11.not            ; <i1> [#uses=1]
+        ret i1 %bothcond
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2007-05-14-Crash.ll b/llvm/test/Transforms/InstCombine/2007-05-14-Crash.ll
new file mode 100644
index 00000000000..e744489f436
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-05-14-Crash.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -disable-output
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-unknown-linux-gnu"
+
+%struct.abc = type { i32, [32 x i8] }        
+%struct.def = type { i8**, %struct.abc }        
+        %struct.anon = type <{  }>
+
+define i8* @foo(%struct.anon* %deviceRef, %struct.abc* %pCap) {
+entry:
+        %tmp1 = bitcast %struct.anon* %deviceRef to %struct.def*            
+        %tmp3 = getelementptr %struct.def, %struct.def* %tmp1, i32 0, i32 1               
+        %tmp35 = bitcast %struct.abc* %tmp3 to i8*           
+        ret i8* %tmp35
+}
+
+
diff --git a/llvm/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll b/llvm/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
new file mode 100644
index 00000000000..eb0c364bfa2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep "call.*sret"
+; Make sure instcombine doesn't drop the sret attribute.
+
+define void @blah(i16* %tmp10) {
+entry:
+	call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend_stret to void (i16*)*)( i16*  sret %tmp10  )
+	ret void
+}
+
+declare i8* @objc_msgSend_stret(i8*, i8*, ...)
diff --git a/llvm/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll b/llvm/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
new file mode 100644
index 00000000000..2b89a9d721f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | grep "ashr"
+; PR1499
+
+define void @av_cmp_q_cond_true(i32* %retval, i32* %tmp9, i64* %tmp10) {
+newFuncRoot:
+	br label %cond_true
+
+return.exitStub:		; preds = %cond_true
+	ret void
+
+cond_true:		; preds = %newFuncRoot
+	%tmp30 = load i64, i64* %tmp10		; <i64> [#uses=1]
+	%.cast = zext i32 63 to i64		; <i64> [#uses=1]
+	%tmp31 = ashr i64 %tmp30, %.cast		; <i64> [#uses=1]
+	%tmp3132 = trunc i64 %tmp31 to i32		; <i32> [#uses=1]
+	%tmp33 = or i32 %tmp3132, 1		; <i32> [#uses=1]
+	store i32 %tmp33, i32* %tmp9
+	%tmp34 = load i32, i32* %tmp9		; <i32> [#uses=1]
+	store i32 %tmp34, i32* %retval
+	br label %return.exitStub
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll b/llvm/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
new file mode 100644
index 00000000000..b2b04d6bd2f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep "ret i1 true"
+; rdar://5278853
+
+define i1 @test(i32 %tmp468) {
+        %tmp470 = udiv i32 %tmp468, 4           ; <i32> [#uses=2]
+        %tmp475 = icmp ult i32 %tmp470, 1073741824              ; <i1> [#uses=1]
+        ret i1 %tmp475
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll b/llvm/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll
new file mode 100644
index 00000000000..3f76187b780
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR1594
+
+define i64 @test(i16 %tmp510, i16 %tmp512) {
+	%W = sext i16 %tmp510 to i32           ; <i32> [#uses=1]
+        %X = sext i16 %tmp512 to i32           ; <i32> [#uses=1]
+        %Y = add i32 %W, %X               ; <i32> [#uses=1]
+        %Z = sext i32 %Y to i64          ; <i64> [#uses=1]
+	ret i64 %Z
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/llvm/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
new file mode 100644
index 00000000000..c303ddd5897
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | grep icmp
+; PR1646
+
+@__gthrw_pthread_cancel = weak alias i32 (i32), i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
+@__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)		; <i8**> [#uses=1]
+define weak i32 @pthread_cancel(i32) {
+       ret i32 0
+}
+
+define i1 @__gthread_active_p() {
+entry:
+	%tmp1 = load i8*, i8** @__gthread_active_ptr.5335, align 4		; <i8*> [#uses=1]
+	%tmp2 = icmp ne i8* %tmp1, null		; <i1> [#uses=1]
+	ret i1 %tmp2
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/llvm/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
new file mode 100644
index 00000000000..7c6df1f984a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | grep icmp
+; PR1678
+
+@A = weak alias void (), void ()* @B		; <void ()*> [#uses=1]
+
+define weak void @B() {
+       ret void
+}
+
+define i32 @active() {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp1 = icmp ne void ()* @A, null		; <i1> [#uses=1]
+	%tmp12 = zext i1 %tmp1 to i32		; <i32> [#uses=1]
+	ret i32 %tmp12
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
new file mode 100644
index 00000000000..427d0e3b0d1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | not grep call
+; RUN: opt < %s -O3 -S | not grep xyz
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@.str = internal constant [4 x i8] c"xyz\00"		; <[4 x i8]*> [#uses=1]
+
+define void @foo(i8* %P) {
+entry:
+  %P_addr = alloca i8*
+  store i8* %P, i8** %P_addr
+  %tmp = load i8*, i8** %P_addr, align 4
+  %tmp1 = getelementptr [4 x i8], [4 x i8]* @.str, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i1 false)
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/2007-10-12-Crash.ll b/llvm/test/Transforms/InstCombine/2007-10-12-Crash.ll
new file mode 100644
index 00000000000..33d55e7e9e0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-10-12-Crash.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -instcombine -disable-output
+
+	%struct.Ray = type { %struct.Vec, %struct.Vec }
+	%struct.Scene = type { i32 (...)** }
+	%struct.Vec = type { double, double, double }
+
+declare double @_Z9ray_traceRK3VecRK3RayRK5Scene(%struct.Vec*, %struct.Ray*, %struct.Scene*)
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%tmp3 = alloca %struct.Ray, align 4		; <%struct.Ray*> [#uses=2]
+	%tmp97 = icmp slt i32 0, 512		; <i1> [#uses=1]
+	br i1 %tmp97, label %bb71, label %bb108
+
+bb29:		; preds = %bb62
+	%tmp322 = bitcast %struct.Ray* %tmp3 to %struct.Vec*		; <%struct.Vec*> [#uses=1]
+	%tmp322.0 = getelementptr %struct.Vec, %struct.Vec* %tmp322, i32 0, i32 0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %tmp322.0
+	%tmp57 = call double @_Z9ray_traceRK3VecRK3RayRK5Scene( %struct.Vec* null, %struct.Ray* %tmp3, %struct.Scene* null )		; <double> [#uses=0]
+	br label %bb62
+
+bb62:		; preds = %bb71, %bb29
+	%tmp65 = icmp slt i32 0, 4		; <i1> [#uses=1]
+	br i1 %tmp65, label %bb29, label %bb68
+
+bb68:		; preds = %bb62
+	ret i32 0
+
+bb71:		; preds = %entry
+	%tmp74 = icmp slt i32 0, 4		; <i1> [#uses=1]
+	br i1 %tmp74, label %bb62, label %bb77
+
+bb77:		; preds = %bb71
+	ret i32 0
+
+bb108:		; preds = %entry
+	ret i32 0
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-10-28-stacksave.ll b/llvm/test/Transforms/InstCombine/2007-10-28-stacksave.ll
new file mode 100644
index 00000000000..48e876510a9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-10-28-stacksave.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -instcombine -S | grep "call.*stacksave"
+; PR1745
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@p = weak global i8* null		; <i8**> [#uses=1]
+
+define i32 @main() {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	br label %lab
+
+lab:		; preds = %cleanup31, %entry
+	%n.0 = phi i32 [ 0, %entry ], [ %tmp25, %cleanup31 ]		; <i32> [#uses=2]
+	%tmp2 = call i8* @llvm.stacksave( )		; <i8*> [#uses=2]
+	%tmp4 = srem i32 %n.0, 47		; <i32> [#uses=1]
+	%tmp5 = add i32 %tmp4, 1		; <i32> [#uses=5]
+	%tmp7 = sub i32 %tmp5, 1		; <i32> [#uses=0]
+	%tmp89 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
+	%tmp10 = mul i64 %tmp89, 32		; <i64> [#uses=0]
+	%tmp12 = mul i32 %tmp5, 4		; <i32> [#uses=0]
+	%tmp1314 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp1314, 32		; <i64> [#uses=0]
+	%tmp17 = mul i32 %tmp5, 4		; <i32> [#uses=1]
+	%tmp18 = alloca i8, i32 %tmp17		; <i8*> [#uses=1]
+	%tmp1819 = bitcast i8* %tmp18 to i32*		; <i32*> [#uses=2]
+	%tmp21 = getelementptr i32, i32* %tmp1819, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp21, align 4
+	%tmp2223 = bitcast i32* %tmp1819 to i8*		; <i8*> [#uses=1]
+	store volatile i8* %tmp2223, i8** @p, align 4
+	%tmp25 = add i32 %n.0, 1		; <i32> [#uses=2]
+	%tmp27 = icmp sle i32 %tmp25, 999999		; <i1> [#uses=1]
+	%tmp2728 = zext i1 %tmp27 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp2728, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cleanup31, label %cond_next
+
+cond_next:		; preds = %lab
+	call void @llvm.stackrestore( i8* %tmp2 )
+	ret i32 0
+
+cleanup31:		; preds = %lab
+	call void @llvm.stackrestore( i8* %tmp2 )
+	br label %lab
+}
+
+declare i8* @llvm.stacksave()
+
+declare void @llvm.stackrestore(i8*)
diff --git a/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll b/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
new file mode 100644
index 00000000000..e42e5adf2e9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -disable-output
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-unknown-linux-gnu"
+
+define i32 @test() {
+entry:
+	%tmp50.i17 = icmp slt i32 0, 4		; <i1> [#uses=1]
+	br i1 %tmp50.i17, label %bb.i, label %calculateColorSpecificBlackLevel.exit
+
+bb.i:		; preds = %entry
+	br label %bb51.i.i
+
+bb27.i.i:		; preds = %bb51.i.i
+	%tmp31.i.i = load i16, i16* null, align 2		; <i16> [#uses=2]
+	%tmp35.i.i = icmp ult i16 %tmp31.i.i, 1		; <i1> [#uses=1]
+	%tmp41.i.i = icmp ugt i16 %tmp31.i.i, -1		; <i1> [#uses=1]
+	%bothcond.i.i = or i1 %tmp35.i.i, %tmp41.i.i		; <i1> [#uses=1]
+	%bothcond1.i.i = zext i1 %bothcond.i.i to i32		; <i32> [#uses=1]
+	%tmp46.i.i = xor i32 %bothcond1.i.i, 1		; <i32> [#uses=1]
+	%count.0.i.i = add i32 %count.1.i.i, %tmp46.i.i		; <i32> [#uses=1]
+	%tmp50.i.i = add i32 %x.0.i.i, 2		; <i32> [#uses=1]
+	br label %bb51.i.i
+
+bb51.i.i:		; preds = %bb27.i.i, %bb.i
+	%count.1.i.i = phi i32 [ %count.0.i.i, %bb27.i.i ], [ 0, %bb.i ]		; <i32> [#uses=1]
+	%x.0.i.i = phi i32 [ %tmp50.i.i, %bb27.i.i ], [ 0, %bb.i ]		; <i32> [#uses=2]
+	%tmp54.i.i = icmp slt i32 %x.0.i.i, 0		; <i1> [#uses=1]
+	br i1 %tmp54.i.i, label %bb27.i.i, label %bb57.i.i
+
+bb57.i.i:		; preds = %bb51.i.i
+	ret i32 0
+
+calculateColorSpecificBlackLevel.exit:		; preds = %entry
+	ret i32 undef
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-10-31-StringCrash.ll b/llvm/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
new file mode 100644
index 00000000000..876cdd53f52
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -disable-output
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+declare void @__darwin_gcc3_preregister_frame_info()
+
+define void @_start(i32 %argc, i8** %argv, i8** %envp) {
+entry:
+	%tmp1 = bitcast void ()* @__darwin_gcc3_preregister_frame_info to i32*		; <i32*> [#uses=1]
+	%tmp2 = load i32, i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp3 = icmp ne i32 %tmp2, 0		; <i1> [#uses=1]
+	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %return
+
+cond_true:		; preds = %entry
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll b/llvm/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
new file mode 100644
index 00000000000..ff3107297a1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR1780
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+%opaque_t = type opaque
+%opaque2 = type opaque
+%op_ts = type {%opaque2, i32}
+
+@g = external global %opaque_t
+@h = external global %op_ts
+
+define i32 @foo() {
+entry:
+        %x = load i8, i8* bitcast (%opaque_t* @g to i8*)
+        %y = load i32, i32* bitcast (%op_ts* @h to i32*)
+	%z = zext i8 %x to i32
+	%r = add i32 %y, %z
+        ret i32 %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll b/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
new file mode 100644
index 00000000000..6b83dd982dc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep "icmp eq i32 %In, 1"
+; PR1800
+
+define i1 @test(i32 %In) {
+	%c1 = icmp sgt i32 %In, -1
+	%c2 = icmp eq i32 %In, 1
+	%V = and i1 %c1, %c2
+	ret i1 %V
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll b/llvm/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
new file mode 100644
index 00000000000..1232005a561
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | not grep bitcast
+; PR1716
+
+@.str = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%tmp32 = tail call i32 (i8*  , ...) bitcast (i32 (i8*, ...)  * @printf to i32 (i8*  , ...)  *)( i8* getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0)  , i32 0 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @printf(i8*, ...) nounwind 
diff --git a/llvm/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll b/llvm/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
new file mode 100644
index 00000000000..89f867252bf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
@@ -0,0 +1,9 @@
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+; RUN: opt < %s -instcombine -S | not grep "ret i1 0"
+; PR1850
+
+define i1 @test() {
+	%cond = icmp ule i8* inttoptr (i64 4294967297 to i8*), inttoptr (i64 5 to i8*)
+	ret i1 %cond
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-12-12-GEPScale.ll b/llvm/test/Transforms/InstCombine/2007-12-12-GEPScale.ll
new file mode 100644
index 00000000000..60f715e2ef7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-12-12-GEPScale.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | not grep 1431655764
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+
+define i8* @foo([100 x {i8,i8,i8}]* %x) {
+entry:
+        %p = bitcast [100 x {i8,i8,i8}]* %x to i8*
+        %q = getelementptr i8, i8* %p, i32 -4
+        ret i8* %q
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll b/llvm/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll
new file mode 100644
index 00000000000..85cf9b6904c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep nounwind
+
+define void @bar() {
+entry:
+        call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"( )
+        ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll b/llvm/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
new file mode 100644
index 00000000000..62fb4132d5d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @foo(i32 %a) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[T15:%.*]] = sub i32 99, [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[T15]], 0
+; CHECK-NEXT:    [[A_OP:%.*]] = add i32 [[A]], 1
+; CHECK-NEXT:    [[T13:%.*]] = select i1 [[TMP1]], i32 100, i32 [[A_OP]]
+; CHECK-NEXT:    ret i32 [[T13]]
+;
+  %t15 = sub i32 99, %a
+  %t16 = icmp slt i32 %t15, 0
+  %smax = select i1 %t16, i32 0, i32 %t15
+  %t12 = add i32 %smax, %a
+  %t13 = add i32 %t12, 1
+  ret i32 %t13
+}
+
+define i32 @bar(i32 %a) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[T15:%.*]] = sub i32 99, [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[T15]], 0
+; CHECK-NEXT:    [[T12:%.*]] = select i1 [[TMP1]], i32 99, i32 [[A]]
+; CHECK-NEXT:    ret i32 [[T12]]
+;
+  %t15 = sub i32 99, %a
+  %t16 = icmp slt i32 %t15, 0
+  %smax = select i1 %t16, i32 0, i32 %t15
+  %t12 = add i32 %smax, %a
+  ret i32 %t12
+}
+
+define i32 @fun(i32 %a) {
+; CHECK-LABEL: @fun(
+; CHECK-NEXT:    [[T16:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[T12:%.*]] = select i1 [[T16]], i32 [[A]], i32 99
+; CHECK-NEXT:    ret i32 [[T12]]
+;
+  %t15 = sub i32 99, %a
+  %t16 = icmp slt i32 %a, 0
+  %smax = select i1 %t16, i32 0, i32 %t15
+  %t12 = add i32 %smax, %a
+  ret i32 %t12
+}
diff --git a/llvm/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll b/llvm/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
new file mode 100644
index 00000000000..7260c001b0d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -mem2reg -instcombine -S | grep "ret i32 1" | count 8
+
+define i32 @test1() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32, i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp ule i32 %sub, 0
+	%retval = select i1 %cmp, i32 0, i32 1
+	ret i32 %retval
+}
+
+define i32 @test2() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32, i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp ugt i32 %sub, 0
+	%retval = select i1 %cmp, i32 1, i32 0
+	ret i32 %retval
+}
+
+define i32 @test3() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32, i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp slt i32 %sub, 0
+	%retval = select i1 %cmp, i32 1, i32 0
+	ret i32 %retval
+}
+
+define i32 @test4() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32, i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp sle i32 %sub, 0
+	%retval = select i1 %cmp, i32 1, i32 0
+	ret i32 %retval
+}
+
+define i32 @test5() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32, i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp sge i32 %sub, 0
+	%retval = select i1 %cmp, i32 0, i32 1
+	ret i32 %retval
+}
+
+define i32 @test6() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32, i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp sgt i32 %sub, 0
+	%retval = select i1 %cmp, i32 0, i32 1
+	ret i32 %retval
+}
+
+define i32 @test7() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32, i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp eq i32 %sub, 0
+	%retval = select i1 %cmp, i32 0, i32 1
+	ret i32 %retval
+}
+
+define i32 @test8() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32, i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp ne i32 %sub, 0
+	%retval = select i1 %cmp, i32 1, i32 0
+	ret i32 %retval
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll b/llvm/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
new file mode 100644
index 00000000000..22c078250ec
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
@@ -0,0 +1,30 @@
+; Ignore stderr, we expect warnings there
+; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
+
+; CHECK-NOT: bitcast
+
+define void @a() {
+  ret void
+}
+
+define signext i32 @b(i32* inreg  %x)   {
+  ret i32 0
+}
+
+define void @c(...) {
+  ret void
+}
+
+define void @g(i32* %y) {
+; CHECK-LABEL: @g(
+; CHECK: call i64 bitcast (i32 (i32*)* @b to i64 (i32)*)(i32 0)
+	%x = call i64 bitcast (i32 (i32*)* @b to i64 (i32)*)( i32 0 )		; <i64> [#uses=0]
+
+; The rest should not have bitcasts remaining
+; CHECK-NOT: bitcast
+  call void bitcast (void ()* @a to void (i32*)*)( i32* noalias  %y )
+  call <2 x i32> bitcast (i32 (i32*)* @b to <2 x i32> (i32*)*)( i32* inreg  null )		; <<2 x i32>>:1 [#uses=0]
+  call void bitcast (void (...)* @c to void (i32)*)( i32 0 )
+  call void bitcast (void (...)* @c to void (i32)*)( i32 zeroext  0 )
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-01-06-CastCrash.ll b/llvm/test/Transforms/InstCombine/2008-01-06-CastCrash.ll
new file mode 100644
index 00000000000..097a0ce849d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-01-06-CastCrash.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define <2 x i32> @f() {
+	ret <2 x i32> undef
+}
+
+define i32 @g() {
+	%x = call i32 bitcast (<2 x i32> ()* @f to i32 ()*)( )		; <i32> [#uses=1]
+	ret i32 %x
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-01-06-VoidCast.ll b/llvm/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
new file mode 100644
index 00000000000..5dcaa38edc0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @f(i16 %y) {
+  ret void
+}
+
+define i32 @g(i32 %y) {
+; CHECK-LABEL: @g(
+; CHECK: call i32 bitcast
+  %x = call i32 bitcast (void (i16)* @f to i32 (i32)*)( i32 %y )		; <i32> [#uses=1]
+  ret i32 %x
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll b/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll
new file mode 100644
index 00000000000..fbc8ba972a0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep and
+; PR1907
+
+define i1 @test(i32 %c84.17) {
+	%tmp2696 = icmp ne i32 %c84.17, 34		; <i1> [#uses=2]
+ 	%tmp2699 = icmp sgt i32 %c84.17, -1		; <i1> [#uses=1]
+ 	%tmp2703 = and i1 %tmp2696, %tmp2699		; <i1> [#uses=1]
+	ret i1 %tmp2703
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll b/llvm/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
new file mode 100644
index 00000000000..b111b85d59f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | grep zeroext
+
+	%struct.FRAME.nest = type { i32, i32 (...)* }
+	%struct.__builtin_trampoline = type { [10 x i8] }
+
+declare void @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
+declare i8* @llvm.adjust.trampoline(i8*) nounwind
+
+declare i32 @f(%struct.FRAME.nest* nest , ...)
+
+define i32 @nest(i32 %n) {
+entry:
+	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
+	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+	%TRAMP.216.sub = getelementptr [10 x i8], [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp3 = getelementptr %struct.FRAME.nest, %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %n, i32* %tmp3, align 8
+	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
+	call void @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, ...)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
+        %tramp = call i8* @llvm.adjust.trampoline( i8* %TRAMP.216.sub)
+	%tmp7 = getelementptr %struct.FRAME.nest, %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (...)**> [#uses=1]
+	%tmp89 = bitcast i8* %tramp to i32 (...)*		; <i32 (...)*> [#uses=2]
+	store i32 (...)* %tmp89, i32 (...)** %tmp7, align 8
+	%tmp2.i = call i32 (...) %tmp89( i32 zeroext 0 )		; <i32> [#uses=1]
+	ret i32 %tmp2.i
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll b/llvm/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll
new file mode 100644
index 00000000000..5ff23a3881f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR1940
+
+define i1 @test1(i8 %A, i8 %B) {
+        %a = zext i8 %A to i32
+        %b = zext i8 %B to i32
+        %c = icmp sgt i32 %a, %b
+        ret i1 %c
+; CHECK: %c = icmp ugt i8 %A, %B
+; CHECK: ret i1 %c
+}
+
+define i1 @test2(i8 %A, i8 %B) {
+        %a = sext i8 %A to i32
+        %b = sext i8 %B to i32
+        %c = icmp ugt i32 %a, %b
+        ret i1 %c
+; CHECK: %c = icmp ugt i8 %A, %B
+; CHECK: ret i1 %c
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
new file mode 100644
index 00000000000..87c2b75d249
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i16 @test1(i16 %a) {
+        %tmp = zext i16 %a to i32               ; <i32> [#uses=2]
+        %tmp21 = lshr i32 %tmp, 8               ; <i32> [#uses=1]
+; CHECK: %tmp21 = lshr i16 %a, 8
+        %tmp5 = mul i32 %tmp, 5         ; <i32> [#uses=1]
+; CHECK: %tmp5 = mul i16 %a, 5
+        %tmp.upgrd.32 = or i32 %tmp21, %tmp5            ; <i32> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16           ; <i16> [#uses=1]
+        ret i16 %tmp.upgrd.3
+; CHECK: ret i16 %tmp.upgrd.32
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll b/llvm/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
new file mode 100644
index 00000000000..6b4e89dbbe6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: select
+
+define double @fold(i1 %a, double %b) {
+%s = select i1 %a, double 0., double 1.
+%c = fdiv double %b, %s
+ret double %c
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-02-13-MulURem.ll b/llvm/test/Transforms/InstCombine/2008-02-13-MulURem.ll
new file mode 100644
index 00000000000..d85ef97553a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-02-13-MulURem.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR1933
+
+; CHECK: rem
+
+define i32 @fold(i32 %a) {
+  %s = mul i32 %a, 3
+  %c = urem i32 %s, 3
+  ret i32 %c
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll b/llvm/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
new file mode 100644
index 00000000000..854f8cb0b56
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep "sdiv i8 \%a, 9"
+; PR2048
+
+define i8 @i(i8 %a) {
+  %tmp1 = sdiv i8 %a, -3
+  %tmp2 = sdiv i8 %tmp1, -3
+  ret i8 %tmp2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-02-23-MulSub.ll b/llvm/test/Transforms/InstCombine/2008-02-23-MulSub.ll
new file mode 100644
index 00000000000..bb21c4b0341
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-02-23-MulSub.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep mul
+
+define i26 @test(i26 %a) nounwind  {
+entry:
+	%_add = mul i26 %a, 2885		; <i26> [#uses=1]
+	%_shl2 = mul i26 %a, 2884		; <i26> [#uses=1]
+	%_sub = sub i26 %_add, %_shl2		; <i26> [#uses=1]
+	ret i26 %_sub
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll b/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll
new file mode 100644
index 00000000000..7f8bd4fb8a9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; rdar://5771353
+
+define float @test(float %x, x86_fp80 %y) nounwind readonly  {
+entry:
+	%tmp67 = fcmp uno x86_fp80 %y, 0xK00000000000000000000		; <i1> [#uses=1]
+	%tmp71 = fcmp uno float %x, 0.000000e+00		; <i1> [#uses=1]
+	%bothcond = or i1 %tmp67, %tmp71		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb74, label %bb80
+
+bb74:		; preds = %entry
+	ret float 0.000000e+00
+
+bb80:		; preds = %entry
+	ret float 0.000000e+00
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll b/llvm/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
new file mode 100644
index 00000000000..d086f4b63b7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep "16" | count 1
+
+define i8* @bork(i8** %qux) {
+  %tmp275 = load i8*, i8** %qux, align 1
+  %tmp275276 = ptrtoint i8* %tmp275 to i32
+  %tmp277 = add i32 %tmp275276, 16
+  %tmp277278 = inttoptr i32 %tmp277 to i8*
+  ret i8* %tmp277278
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll b/llvm/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
new file mode 100644
index 00000000000..1ea0998bf70
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
@@ -0,0 +1,15 @@
+;; The bitcast cannot be eliminated because byval arguments need
+;; the correct type, or at least a type of the correct size.
+; RUN: opt < %s -instcombine -S | grep bitcast
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+	%struct.NSRect = type { [4 x float] }
+
+define void @foo(i8* %context) nounwind  {
+entry:
+	%tmp1 = bitcast i8* %context to %struct.NSRect*		; <%struct.NSRect*> [#uses=1]
+	call void (i32, ...) @bar( i32 3, %struct.NSRect* byval align 4  %tmp1 ) nounwind 
+	ret void
+}
+
+declare void @bar(i32, ...)
diff --git a/llvm/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll b/llvm/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
new file mode 100644
index 00000000000..dba6cdb5654
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep "store volatile"
+
+define void @test() {
+	%votf = alloca <4 x float>		; <<4 x float>*> [#uses=1]
+	store volatile <4 x float> zeroinitializer, <4 x float>* %votf, align 16
+	ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
new file mode 100644
index 00000000000..af662bda1e6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@g_1 = internal global i32 0		; <i32*> [#uses=3]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
+	%tmp34 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
+	%tmp3.reg2mem.0 = phi i32 [ %tmp34, %entry ], [ %tmp3, %bb ]		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
+	store volatile i32 %tmp4, i32* @g_1, align 4
+	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
+	%tmp3 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
+	br i1 %tmp9, label %bb, label %bb11
+
+bb11:		; preds = %bb
+	ret i32 0
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
new file mode 100644
index 00000000000..3c67e513bcc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2
+; PR2262
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@g_1 = internal global i32 0		; <i32*> [#uses=3]
+
+define i32 @main(i32 %i) nounwind  {
+entry:
+	%tmp93 = icmp slt i32 %i, 10		; <i1> [#uses=0]
+	%tmp34 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
+	br i1 %tmp93, label %bb11, label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp3 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb
+	%tmp4 = phi i32 [ %tmp34, %entry ], [ %tmp3, %bb ]		; <i32> [#uses=1]
+	ret i32 %tmp4
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll b/llvm/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
new file mode 100644
index 00000000000..907382093d1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | grep "store i8" | count 3
+; PR2297
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define i32 @a() nounwind  {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp1 = call i8* @malloc( i32 10 ) nounwind 		; <i8*> [#uses=5]
+	%tmp3 = getelementptr i8, i8* %tmp1, i32 1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp3, align 1
+	%tmp5 = getelementptr i8, i8* %tmp1, i32 0		; <i8*> [#uses=1]
+	store i8 1, i8* %tmp5, align 1
+	%tmp7 = call i32 @strlen( i8* %tmp1 ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp9 = getelementptr i8, i8* %tmp1, i32 0		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp9, align 1
+	%tmp11 = call i32 (...) @b( i8* %tmp1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 %tmp7
+}
+
+declare i8* @malloc(i32) nounwind 
+
+declare i32 @strlen(i8*) nounwind readonly 
+
+declare i32 @b(...)
diff --git a/llvm/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll b/llvm/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
new file mode 100644
index 00000000000..ce1923338c6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; PR2297
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define i32 @a() nounwind  {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp1 = call i8* @malloc( i32 10 ) nounwind 		; <i8*> [#uses=5]
+	%tmp3 = getelementptr i8, i8* %tmp1, i32 1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp3, align 1
+	%tmp5 = getelementptr i8, i8* %tmp1, i32 0		; <i8*> [#uses=1]
+	store i8 1, i8* %tmp5, align 1
+; CHECK: store
+; CHECK: store
+; CHECK-NEXT: strlen
+; CHECK-NEXT: store
+	%tmp7 = call i32 @strlen( i8* %tmp1 ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp9 = getelementptr i8, i8* %tmp1, i32 0		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp9, align 1
+	%tmp11 = call i32 (...) @b( i8* %tmp1 ) nounwind 		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret i32 %tmp7
+}
+
+declare i8* @malloc(i32) nounwind 
+
+declare i32 @strlen(i8*) nounwind readonly 
+
+declare i32 @b(...)
diff --git a/llvm/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll b/llvm/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
new file mode 100644
index 00000000000..4d9c19ff583
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR2303
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+
+declare i32* @_ZNSt6locale5facet15_S_get_c_localeEv()
+
+declare i32** @__ctype_toupper_loc() readnone 
+
+declare i32** @__ctype_tolower_loc() readnone 
+
+define void @_ZNSt5ctypeIcEC2EPiPKtbm(%"struct.std::ctype<char>"* %this, i32* %unnamed_arg, i16* %__table, i8 zeroext  %__del, i64 %__refs) personality i32 (...)* @__gxx_personality_v0 {
+entry:
+	%tmp8 = invoke i32* @_ZNSt6locale5facet15_S_get_c_localeEv( )
+			to label %invcont unwind label %lpad		; <i32*> [#uses=0]
+
+invcont:		; preds = %entry
+	%tmp32 = invoke i32** @__ctype_toupper_loc( ) readnone 
+			to label %invcont31 unwind label %lpad		; <i32**> [#uses=0]
+
+invcont31:		; preds = %invcont
+	%tmp38 = invoke i32** @__ctype_tolower_loc( ) readnone 
+			to label %invcont37 unwind label %lpad		; <i32**> [#uses=1]
+
+invcont37:		; preds = %invcont31
+	%tmp39 = load i32*, i32** %tmp38, align 8		; <i32*> [#uses=1]
+	%tmp41 = getelementptr %"struct.std::ctype<char>", %"struct.std::ctype<char>"* %this, i32 0, i32 4		; <i32**> [#uses=1]
+	store i32* %tmp39, i32** %tmp41, align 8
+	ret void
+
+lpad:		; preds = %invcont31, %invcont, %entry
+        %exn = landingpad {i8*, i32}
+                 cleanup
+	unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/Transforms/InstCombine/2008-05-17-InfLoop.ll b/llvm/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
new file mode 100644
index 00000000000..af0f2a45e7e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR2339
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+declare void @BZALLOC(i32)
+
+define void @f(i32) {
+entry:
+	%blockSize100k = alloca i32		; <i32*> [#uses=2]
+	store i32 %0, i32* %blockSize100k
+	%n = alloca i32		; <i32*> [#uses=2]
+	load i32, i32* %blockSize100k		; <i32>:1 [#uses=1]
+	store i32 %1, i32* %n
+	load i32, i32* %n		; <i32>:2 [#uses=1]
+	add i32 %2, 2		; <i32>:3 [#uses=1]
+	mul i32 %3, ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i32)		; <i32>:4 [#uses=1]
+	call void @BZALLOC( i32 %4 )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll b/llvm/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
new file mode 100644
index 00000000000..a0e95a93982
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | grep "ret i1 false" | count 2
+; PR2329
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i1 @f1() {
+  ret i1 icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
+}
+
+define i1 @f2() {
+  ret i1 icmp eq (i8* inttoptr (i16 1 to i8*), i8* inttoptr (i16 2 to i8*))
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-05-22-IDivVector.ll b/llvm/test/Transforms/InstCombine/2008-05-22-IDivVector.ll
new file mode 100644
index 00000000000..f7ba99c6b44
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-22-IDivVector.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define <3 x i8> @f(<3 x i8> %i) {
+  %A = sdiv <3 x i8> %i, %i
+  ret <3 x i8> %A
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-05-23-CompareFold.ll b/llvm/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
new file mode 100644
index 00000000000..b10aac96599
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
@@ -0,0 +1,14 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; PR2359
+
+; CHECK-LABEL: @f(
+; CHECK: ret i1 false
+define i1 @f(i8* %x) {
+entry:
+       %tmp462 = load i8, i8* %x, align 1          ; <i8> [#uses=1]
+       %tmp462463 = sitofp i8 %tmp462 to float         ; <float> [#uses=1]
+       %tmp464 = fcmp ugt float %tmp462463, 0x47EFFFFFE0000000         ; <i1>
+       ret i1 %tmp464
+}
+
+
diff --git a/llvm/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/llvm/test/Transforms/InstCombine/2008-05-31-AddBool.ll
new file mode 100644
index 00000000000..31b17196d8f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-31-AddBool.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR2389
+
+; CHECK: xor
+
+define i1 @test(i1 %a, i1 %b) {
+  %A = add i1 %a, %b
+  ret i1 %A
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-05-31-Bools.ll b/llvm/test/Transforms/InstCombine/2008-05-31-Bools.ll
new file mode 100644
index 00000000000..7c33f2dd05e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-05-31-Bools.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S > %t
+; RUN: grep "xor" %t
+; RUN: grep "and" %t
+; RUN: not grep "div" %t
+
+define i1 @foo1(i1 %a, i1 %b) {
+  %A = sub i1 %a, %b
+  ret i1 %A
+}
+
+define i1 @foo2(i1 %a, i1 %b) {
+  %A = mul i1 %a, %b
+  ret i1 %A
+}
+
+define i1 @foo3(i1 %a, i1 %b) {
+  %A = udiv i1 %a, %b
+  ret i1 %A
+}
+
+define i1 @foo4(i1 %a, i1 %b) {
+  %A = sdiv i1 %a, %b
+  ret i1 %A
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll b/llvm/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll
new file mode 100644
index 00000000000..5e4a9d0e5b3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine
+
+define i65 @foo(i65 %x) nounwind  {
+entry:
+	%tmp2 = ashr i65 %x, 65		; <i65> [#uses=1]
+	ret i65 %tmp2
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll b/llvm/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
new file mode 100644
index 00000000000..7e8341b99f7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -instcombine -S | grep "phi i32" | count 2
+
+define void @test() nounwind  {
+entry:
+	br label %bb
+
+bb:		; preds = %bb16, %entry
+	%i.0 = phi i32 [ 0, %entry ], [ %indvar.next, %somebb ]		; <i32> [#uses=1]
+	%x.0 = phi i32 [ 37, %entry ], [ %tmp17, %somebb ]		; <i32> [#uses=1]
+	%tmp = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp1 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp2 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=1]
+	%tmp3 = icmp eq i32 %tmp2, 0		; <i1> [#uses=1]
+	br i1 %tmp3, label %bb7, label %bb5
+
+bb5:		; preds = %bb
+	%tmp6 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
+	br label %bb7
+
+bb7:		; preds = %bb5, %bb
+	%tmp8 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp9 = tail call i32 (...) @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp11 = icmp eq i32 %x.0, 37		; <i1> [#uses=1]
+	br i1 %tmp11, label %bb14, label %bb16
+
+bb14:		; preds = %bb7
+	%tmp15 = tail call i32 (...) @bar( ) nounwind 		; <i32> [#uses=0]
+	br label %bb16
+
+bb16:		; preds = %bb14, %bb7
+	%tmp17 = tail call i32 (...) @zap( ) nounwind 		; <i32> [#uses=1]
+	%indvar.next = add i32 %i.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 42		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %somebb
+
+somebb:
+	br label %bb
+
+return:		; preds = %bb16
+	ret void
+}
+
+declare i32 @bork(...)
+
+declare i32 @bar(...)
+
+declare i32 @zap(...)
diff --git a/llvm/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll b/llvm/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
new file mode 100644
index 00000000000..cc469262d53
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | grep "store i32" | count 2
+
+@g_139 = global i32 0           ; <i32*> [#uses=2]
+
+define void @func_56(i32 %p_60) nounwind  {
+entry:
+        store i32 1, i32* @g_139, align 4
+        %tmp1 = icmp ne i32 %p_60, 0            ; <i1> [#uses=1]
+        %tmp12 = zext i1 %tmp1 to i8            ; <i8> [#uses=1]
+        %toBool = icmp ne i8 %tmp12, 0          ; <i1> [#uses=1]
+        br i1 %toBool, label %bb, label %return
+
+bb:             ; preds = %bb, %entry
+        store i32 1, i32* @g_139, align 4
+        br label %bb
+
+return:         ; preds = %entry
+        ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll b/llvm/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
new file mode 100644
index 00000000000..bf5e96b763f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | grep "store i8" | count 2
+
+define i32 @a(i8* %s) nounwind  {
+entry:
+	store i8 0, i8* %s, align 1 ; This store cannot be eliminated!
+	%tmp3 = call i32 @strlen( i8* %s ) nounwind readonly
+	%tmp5 = icmp ne i32 %tmp3, 0
+	br i1 %tmp5, label %bb, label %bb8
+
+bb:		; preds = %entry
+	store i8 0, i8* %s, align 1
+	br label %bb8
+
+bb8:
+	ret i32 %tmp3
+}
+
+declare i32 @strlen(i8*) nounwind readonly 
+
diff --git a/llvm/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll b/llvm/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
new file mode 100644
index 00000000000..c3aab464b87
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | grep load | count 3
+; PR2471
+
+declare i32 @x(i32*)
+define i32 @b(i32* %a, i32* %b) {
+entry:
+        %tmp1 = load i32, i32* %a            
+        %tmp3 = load i32, i32* %b           
+        %add = add i32 %tmp1, %tmp3   
+        %call = call i32 @x( i32* %a )
+        %tobool = icmp ne i32 %add, 0
+	; not safe to turn into an uncond load
+        %cond = select i1 %tobool, i32* %b, i32* %a             
+        %tmp8 = load i32, i32* %cond       
+        ret i32 %tmp8
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll b/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
new file mode 100644
index 00000000000..80bd83bc6ba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | grep "icmp eq i32 %In, 15"
+; PR2479
+; (See also PR1800.)
+
+define i1 @test(i32 %In) {
+	%c1 = icmp ugt i32 %In, 13
+	%c2 = icmp eq i32 %In, 15
+	%V = and i1 %c1, %c2
+	ret i1 %V
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-06-24-StackRestore.ll b/llvm/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
new file mode 100644
index 00000000000..f963b009671
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | grep "call.*llvm.stackrestore"
+; PR2488
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@p = weak global i8* null		; <i8**> [#uses=2]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp248 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp2752 = alloca i32		; <i32*> [#uses=2]
+	%tmpcast53 = bitcast i32* %tmp2752 to i8*		; <i8*> [#uses=1]
+	store i32 2, i32* %tmp2752, align 4
+	store volatile i8* %tmpcast53, i8** @p, align 4
+	br label %bb44
+
+bb:		; preds = %bb44
+	ret i32 0
+
+bb44:		; preds = %bb44, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %tmp3857, %bb44 ]		; <i32> [#uses=1]
+	%tmp249 = phi i8* [ %tmp248, %entry ], [ %tmp2, %bb44 ]		; <i8*> [#uses=1]
+	%tmp3857 = add i32 %indvar, 1		; <i32> [#uses=3]
+	call void @llvm.stackrestore( i8* %tmp249 )
+	%tmp2 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp4 = srem i32 %tmp3857, 1000		; <i32> [#uses=2]
+	%tmp5 = add i32 %tmp4, 1		; <i32> [#uses=1]
+	%tmp27 = alloca i32, i32 %tmp5		; <i32*> [#uses=3]
+	%tmpcast = bitcast i32* %tmp27 to i8*		; <i8*> [#uses=1]
+	store i32 1, i32* %tmp27, align 4
+	%tmp34 = getelementptr i32, i32* %tmp27, i32 %tmp4		; <i32*> [#uses=1]
+	store i32 2, i32* %tmp34, align 4
+	store volatile i8* %tmpcast, i8** @p, align 4
+	%exitcond = icmp eq i32 %tmp3857, 999999		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb, label %bb44
+}
+
+declare i8* @llvm.stacksave() nounwind 
+
+declare void @llvm.stackrestore(i8*) nounwind 
diff --git a/llvm/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll b/llvm/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
new file mode 100644
index 00000000000..b0a17467455
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @PR2330(i32 %a) {
+; CHECK-LABEL: @PR2330(
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 %a, 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %tmp15 = shl i32 1, %a
+  %tmp237 = and i32 %tmp15, 1
+  %toBool = icmp eq i32 %tmp237, 0
+  ret i1 %toBool
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-07-08-SubAnd.ll b/llvm/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
new file mode 100644
index 00000000000..a3d44cb2467
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep -v "i32 8"
+; PR2330
+
+define i32 @a(i32 %a) nounwind  {
+entry:
+	%tmp2 = sub i32 8, %a		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp2, 7		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/llvm/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
new file mode 100644
index 00000000000..17ec9cd1d82
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2
+; PR2496
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@g_1 = internal global i32 0		; <i32*> [#uses=3]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
+	%tmp34 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
+	%tmp3.reg2mem.0 = phi i32 [ %tmp3, %bb ], [ %tmp34, %entry ]
+	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
+	store volatile i32 %tmp4, i32* @g_1, align 4
+	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
+	%tmp3 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
+	br i1 %tmp9, label %bb, label %bb11
+
+bb11:		; preds = %bb
+	ret i32 0
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-07-09-SubAndError.ll b/llvm/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
new file mode 100644
index 00000000000..ed0141403bb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep "sub i32 0"
+; PR2330
+
+define i32 @foo(i32 %a) nounwind {
+entry:
+  %A = sub i32 5, %a
+  %B = and i32 %A, 2
+  ret i32 %B
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll b/llvm/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
new file mode 100644
index 00000000000..a9fa53d3999
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @PR2539_A(i1 %A) {
+; CHECK-LABEL: @PR2539_A(
+; CHECK-NEXT:    [[C:%.*]] = xor i1 %A, true
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = zext i1 %A to i32
+  %C = icmp slt i32 %B, 1
+  ret i1 %C
+}
+
+
+define i1 @PR2539_B(i1 zeroext %b) {
+; CHECK-LABEL: @PR2539_B(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = icmp slt i1 %b, true
+  ret i1 %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll b/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
new file mode 100644
index 00000000000..bf53451d66c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep rem
+; PR2330
+
+define i32 @a(i32 %b) nounwind  {
+entry:
+	srem i32 %b, 8		; <i32>:0 [#uses=1]
+	and i32 %0, 1		; <i32>:1 [#uses=1]
+	ret i32 %1
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-07-13-DivZero.ll b/llvm/test/Transforms/InstCombine/2008-07-13-DivZero.ll
new file mode 100644
index 00000000000..18c99542834
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-07-13-DivZero.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | grep "lshr.*3"
+; RUN: opt < %s -instcombine -S | grep "call .*%cond"
+; PR2506
+
+; We can simplify the operand of udiv to '8', but not the operand to the
+; call.  If the callee never returns, we can't assume the div is reachable.
+define i32 @a(i32 %x, i32 %y) {
+entry:
+        %tobool = icmp ne i32 %y, 0             ; <i1> [#uses=1]
+        %cond = select i1 %tobool, i32 8, i32 0         ; <i32> [#uses=2]
+        %call = call i32 @b( i32 %cond )                ; <i32> [#uses=0]
+        %div = udiv i32 %x, %cond               ; <i32> [#uses=1]
+        ret i32 %div
+}
+
+declare i32 @b(i32)
diff --git a/llvm/test/Transforms/InstCombine/2008-07-16-fsub.ll b/llvm/test/Transforms/InstCombine/2008-07-16-fsub.ll
new file mode 100644
index 00000000000..672b4e95526
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-07-16-fsub.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep sub
+; PR2553
+
+define double @test(double %X) nounwind {
+	; fsub of self can't be optimized away.
+	%Y = fsub double %X, %X
+	ret double %Y
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll
new file mode 100644
index 00000000000..91f1c0b0a98
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | not grep or
+; PR2629
+
+define void @f(i8* %x) nounwind  {
+entry:
+        br label %bb
+
+bb:
+	%g1 = getelementptr i8, i8* %x, i32 0
+        %l1 = load i8, i8* %g1, align 1
+	%s1 = sub i8 %l1, 6
+	%c1 = icmp ugt i8 %s1, 2
+	%s2 = sub i8 %l1, 10
+        %c2 = icmp ugt i8 %s2, 2
+        %a1 = and i1 %c1, %c2
+	br i1 %a1, label %incompatible, label %okay
+
+okay:
+        ret void
+
+incompatible:
+        ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll b/llvm/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll
new file mode 100644
index 00000000000..7c50141421d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine
+
+define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x, i32 %start_y, i32 %end_y) {
+	br label %1
+
+; <label>:1		; preds = %4, %0
+	%2 = icmp slt i32 0, %end_y		; <i1> [#uses=1]
+	br i1 %2, label %4, label %3
+
+; <label>:3		; preds = %1
+	ret void
+
+; <label>:4		; preds = %6, %1
+	%5 = icmp slt i32 0, %end_x		; <i1> [#uses=1]
+	br i1 %5, label %6, label %1
+
+; <label>:6		; preds = %4
+	%7 = srem <2 x i32> zeroinitializer, zeroinitializer		; <<2 x i32>> [#uses=1]
+	%8 = extractelement <2 x i32> %7, i32 1		; <i32> [#uses=1]
+	%9 = select i1 false, i32 0, i32 %8		; <i32> [#uses=1]
+	%10 = insertelement <2 x i32> zeroinitializer, i32 %9, i32 1		; <<2 x i32>> [#uses=1]
+	%11 = extractelement <2 x i32> %10, i32 1		; <i32> [#uses=1]
+	%12 = insertelement <4 x i32> zeroinitializer, i32 %11, i32 3		; <<4 x i32>> [#uses=1]
+	%13 = sitofp <4 x i32> %12 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %13, <4 x float>* null
+	br label %4
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll b/llvm/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
new file mode 100644
index 00000000000..cf29f8d9bf2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep "ret i1 false"
+; PR2697
+
+define i1 @x(i32 %x) nounwind {
+	%div = sdiv i32 %x, 65536		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %div, -65536
+	ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll b/llvm/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll
new file mode 100644
index 00000000000..d70d05293e8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine
+; PR2940
+
+define i32 @tstid() {
+	%var0 = inttoptr i32 1 to i8*		; <i8*> [#uses=1]
+	%var2 = ptrtoint i8* %var0 to i32		; <i32> [#uses=1]
+	ret i32 %var2
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll b/llvm/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
new file mode 100644
index 00000000000..679cc5f73d7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep "ret i1 true"
+; PR2993
+
+define i1 @foo(i32 %x) {
+  %1 = srem i32 %x, -1
+  %2 = icmp eq i32 %1, 0
+  ret i1 %2
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-11-08-FCmp.ll b/llvm/test/Transforms/InstCombine/2008-11-08-FCmp.ll
new file mode 100644
index 00000000000..f1af7ce2fb1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-11-08-FCmp.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR3021
+
+; When inst combining an FCMP with the LHS coming from a uitofp instruction, we
+; can't lower it to signed ICMP instructions.
+
+; CHECK-LABEL: @test1(
+define i1 @test1(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp ole double %1, 0.000000e+00
+; CHECK: icmp eq i32 %val, 0
+  ret i1 %2
+}
+
+; CHECK-LABEL: @test2(
+define i1 @test2(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp olt double %1, 0.000000e+00
+  ret i1 %2
+; CHECK: ret i1 false
+}
+
+; CHECK-LABEL: @test3(
+define i1 @test3(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp oge double %1, 0.000000e+00
+  ret i1 %2
+; CHECK: ret i1 true
+}
+
+; CHECK-LABEL: @test4(
+define i1 @test4(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp ogt double %1, 0.000000e+00
+; CHECK: icmp ne i32 %val, 0
+  ret i1 %2
+}
+
+; CHECK-LABEL: @test5(
+define i1 @test5(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp ogt double %1, -4.400000e+00
+  ret i1 %2
+; CHECK: ret i1 true
+}
+
+; CHECK-LABEL: @test6(
+define i1 @test6(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp olt double %1, -4.400000e+00
+  ret i1 %2
+; CHECK: ret i1 false
+}
+
+; Check that optimizing unsigned >= comparisons correctly distinguishes
+; positive and negative constants.  <rdar://problem/12029145>
+; CHECK-LABEL: @test7(
+define i1 @test7(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp oge double %1, 3.200000e+00
+  ret i1 %2
+; CHECK: icmp ugt i32 %val, 3
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-11-27-IDivVector.ll b/llvm/test/Transforms/InstCombine/2008-11-27-IDivVector.ll
new file mode 100644
index 00000000000..318a80cbc2a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-11-27-IDivVector.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | not grep div
+
+define <2 x i8> @f(<2 x i8> %x) {
+  %A = udiv <2 x i8> %x, <i8 1, i8 1>
+  ret <2 x i8> %A
+}
+
+define <2 x i8> @g(<2 x i8> %x) {
+  %A = sdiv <2 x i8> %x, <i8 1, i8 1>
+  ret <2 x i8> %A
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll b/llvm/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll
new file mode 100644
index 00000000000..d8c53fac49e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | not grep mul
+
+define <2 x i8> @f(<2 x i8> %x) {
+  %A = mul <2 x i8> %x, <i8 1, i8 1>
+  ret <2 x i8> %A
+}
+
+define <2 x i8> @g(<2 x i8> %x) {
+  %A = mul <2 x i8> %x, <i8 -1, i8 -1>
+  ret <2 x i8> %A
+}
diff --git a/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll b/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
new file mode 100644
index 00000000000..75bd5e0175f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep "i8 2, i8 2"
+; PR2756
+
+define <2 x i8> @foo(<2 x i8> %x) {
+  %A = srem <2 x i8> %x, <i8 2, i8 -2>
+  ret <2 x i8> %A
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-01-05-i128-crash.ll b/llvm/test/Transforms/InstCombine/2009-01-05-i128-crash.ll
new file mode 100644
index 00000000000..d355e0aff8a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-01-05-i128-crash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR3235
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define hidden i128 @"\01_gfortrani_max_value"(i32 %length, i32 %signed_flag) nounwind {
+entry:
+	switch i32 %length, label %bb13 [
+		i32 1, label %bb17
+		i32 4, label %bb9
+		i32 8, label %bb5
+	]
+
+bb5:		; preds = %entry
+	%0 = icmp eq i32 %signed_flag, 0		; <i1> [#uses=1]
+	%iftmp.28.0 = select i1 %0, i128 18446744073709551615, i128 9223372036854775807		; <i128> [#uses=1]
+	ret i128 %iftmp.28.0
+
+bb9:		; preds = %entry
+	ret i128 0
+
+bb13:		; preds = %entry
+	ret i128 0
+
+bb17:		; preds = %entry
+	ret i128 0
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll b/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
new file mode 100644
index 00000000000..9994b588b21
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S > %t
+; RUN: grep ", align 4" %t | count 3
+; RUN: grep ", align 8" %t | count 3
+; rdar://6480438
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.Key = type { { i32, i32 } }
+	%struct.anon = type <{ i8, [3 x i8], i32 }>
+
+define i32 @bar(i64 %key_token2) nounwind {
+entry:
+	%iospec = alloca %struct.Key		; <%struct.Key*> [#uses=3]
+	%ret = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = getelementptr %struct.Key, %struct.Key* %iospec, i32 0, i32 0		; <{ i32, i32 }*> [#uses=2]
+	%1 = getelementptr { i32, i32 }, { i32, i32 }* %0, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %1, align 4
+	%2 = getelementptr { i32, i32 }, { i32, i32 }* %0, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %2, align 4
+	%3 = getelementptr %struct.Key, %struct.Key* %iospec, i32 0, i32 0		; <{ i32, i32 }*> [#uses=1]
+	%4 = bitcast { i32, i32 }* %3 to i64*		; <i64*> [#uses=1]
+	store i64 %key_token2, i64* %4, align 4
+	%5 = call i32 (...) @foo(%struct.Key* byval align 4 %iospec, i32* %ret) nounwind		; <i32> [#uses=0]
+	%6 = load i32, i32* %ret, align 4		; <i32> [#uses=1]
+	ret i32 %6
+}
+
+declare i32 @foo(...)
diff --git a/llvm/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/llvm/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
new file mode 100644
index 00000000000..e3543116a66
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | grep "store.*addrspace(1)"
+; PR3335
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @test(i32* %P) nounwind {
+entry:
+  %Q = addrspacecast i32* %P to i32 addrspace(1)*
+  store i32 0, i32 addrspace(1)* %Q, align 4
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll b/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
new file mode 100644
index 00000000000..b9aa0a25497
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
@@ -0,0 +1,315 @@
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x7FF8000000000000 | count 12
+; RUN: opt < %s -simplifycfg -instcombine -S | grep "0\.0" | count 3
+; RUN: opt < %s -simplifycfg -instcombine -S | grep "3\.5" | count 1
+;
+
+; ModuleID = 'apf.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+@"\01LC" = internal constant [4 x i8] c"%f\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define void @foo1() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF0000000000000, float* %x, align 4
+	store float 0x7FF8000000000000, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind
+
+define void @foo2() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF0000000000000, float* %x, align 4
+	store float 0.000000e+00, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF0000000000000, float* %x, align 4
+	store float 3.500000e+00, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo4() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF0000000000000, float* %x, align 4
+	store float 0x7FF0000000000000, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo5() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF8000000000000, float* %x, align 4
+	store float 0x7FF0000000000000, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo6() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF8000000000000, float* %x, align 4
+	store float 0.000000e+00, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo7() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF8000000000000, float* %x, align 4
+	store float 3.500000e+00, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo8() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF8000000000000, float* %x, align 4
+	store float 0x7FF8000000000000, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo9() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0.000000e+00, float* %x, align 4
+	store float 0x7FF8000000000000, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo10() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0.000000e+00, float* %x, align 4
+	store float 0x7FF0000000000000, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo11() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0.000000e+00, float* %x, align 4
+	store float 0.000000e+00, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo12() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0.000000e+00, float* %x, align 4
+	store float 3.500000e+00, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo13() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 3.500000e+00, float* %x, align 4
+	store float 0x7FF8000000000000, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo14() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 3.500000e+00, float* %x, align 4
+	store float 0x7FF0000000000000, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo15() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 3.500000e+00, float* %x, align 4
+	store float 0.000000e+00, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo16() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 3.500000e+00, float* %x, align 4
+	store float 3.500000e+00, float* %y, align 4
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll b/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
new file mode 100644
index 00000000000..5adcb6bfa07
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x3FB99999A0000000 | count 2
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0xBFB99999A0000000 | count 2
+; check constant folding for 'frem'.  PR 3316.
+
+; ModuleID = 'tt.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define float @test1() nounwind {
+entry:
+	%retval = alloca float		; <float*> [#uses=2]
+	%0 = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = frem double 1.000000e-01, 1.000000e+00	; <double> [#uses=1]
+	%2 = fptrunc double %1 to float		; <float> [#uses=1]
+	store float %2, float* %0, align 4
+	%3 = load float, float* %0, align 4		; <float> [#uses=1]
+	store float %3, float* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load float, float* %retval		; <float> [#uses=1]
+	ret float %retval1
+}
+
+define float @test2() nounwind {
+entry:
+	%retval = alloca float		; <float*> [#uses=2]
+	%0 = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = frem double -1.000000e-01, 1.000000e+00	; <double> [#uses=1]
+	%2 = fptrunc double %1 to float		; <float> [#uses=1]
+	store float %2, float* %0, align 4
+	%3 = load float, float* %0, align 4		; <float> [#uses=1]
+	store float %3, float* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load float, float* %retval		; <float> [#uses=1]
+	ret float %retval1
+}
+
+define float @test3() nounwind {
+entry:
+	%retval = alloca float		; <float*> [#uses=2]
+	%0 = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = frem double 1.000000e-01, -1.000000e+00	; <double> [#uses=1]
+	%2 = fptrunc double %1 to float		; <float> [#uses=1]
+	store float %2, float* %0, align 4
+	%3 = load float, float* %0, align 4		; <float> [#uses=1]
+	store float %3, float* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load float, float* %retval		; <float> [#uses=1]
+	ret float %retval1
+}
+
+define float @test4() nounwind {
+entry:
+	%retval = alloca float		; <float*> [#uses=2]
+	%0 = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = frem double -1.000000e-01, -1.000000e+00	; <double> [#uses=1]
+	%2 = fptrunc double %1 to float		; <float> [#uses=1]
+	store float %2, float* %0, align 4
+	%3 = load float, float* %0, align 4		; <float> [#uses=1]
+	store float %3, float* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load float, float* %retval		; <float> [#uses=1]
+	ret float %retval1
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll b/llvm/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll
new file mode 100644
index 00000000000..3f3535b363d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine
+; PR3381
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.atomic_t = type { i32 }
+	%struct.inode = type { i32, %struct.mutex }
+	%struct.list_head = type { %struct.list_head*, %struct.list_head* }
+	%struct.lock_class_key = type {  }
+	%struct.mutex = type { %struct.atomic_t, %struct.rwlock_t, %struct.list_head }
+	%struct.rwlock_t = type { %struct.lock_class_key }
+
+define void @handle_event(%struct.inode* %bar) nounwind {
+entry:
+	%0 = getelementptr %struct.inode, %struct.inode* %bar, i64 -1, i32 1, i32 1		; <%struct.rwlock_t*> [#uses=1]
+	%1 = bitcast %struct.rwlock_t* %0 to i32*		; <i32*> [#uses=1]
+	store i32 1, i32* %1, align 4
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-01-31-InfIterate.ll b/llvm/test/Transforms/InstCombine/2009-01-31-InfIterate.ll
new file mode 100644
index 00000000000..815c1a91936
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-01-31-InfIterate.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR3452
+define i128 @test(i64 %A, i64 %B, i1 %C, i128 %Z, i128 %Y, i64* %P, i64* %Q) {
+entry:
+	%tmp2 = trunc i128 %Z to i64
+	%tmp4 = trunc i128 %Y to i64
+	store i64 %tmp2, i64* %P
+	store i64 %tmp4, i64* %Q
+	%x = sub i64 %tmp2, %tmp4
+	%c = sub i64 %tmp2, %tmp4
+	%tmp137 = zext i1 %C to i64
+	%tmp138 = sub i64 %c, %tmp137
+	br label %T
+
+T:
+	%G = phi i64 [%tmp138, %entry], [%tmp2, %Fal]
+	%F = zext i64 %G to i128
+	ret i128 %F
+
+Fal:
+	br label %T
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-01-31-Pressure.ll b/llvm/test/Transforms/InstCombine/2009-01-31-Pressure.ll
new file mode 100644
index 00000000000..666b02e8ed0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-01-31-Pressure.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | grep "%B = add i8 %b, %x"
+; PR2698
+
+declare void @use1(i1)
+declare void @use8(i8)
+
+define void @test1(i8 %a, i8 %b, i8 %x) {
+  %A = add i8 %a, %x
+  %B = add i8 %b, %x
+  %C = icmp eq i8 %A, %B
+  call void @use1(i1 %C)
+  ret void
+}
+
+define void @test2(i8 %a, i8 %b, i8 %x) {
+  %A = add i8 %a, %x
+  %B = add i8 %b, %x
+  %C = icmp eq i8 %A, %B
+  call void @use1(i1 %C)
+  call void @use8(i8 %A)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll b/llvm/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll
new file mode 100644
index 00000000000..bc6a2045fa0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine
+; PR3468
+
+define x86_fp80 @cast() {
+	%tmp = bitcast i80 0 to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %tmp
+}
+
+define i80 @invcast() {
+	%tmp = bitcast x86_fp80 0xK00000000000000000000 to i80		; <i80> [#uses=1]
+	ret i80 %tmp
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll b/llvm/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll
new file mode 100644
index 00000000000..b66495d9cba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -inline -instcombine -functionattrs | llvm-dis
+;
+; Check that nocapture attributes are added when run after an SCC pass.
+; PR3520
+
+define i32 @use(i8* %x) nounwind readonly {
+; CHECK: @use(i8* nocapture %x)
+  %1 = tail call i64 @strlen(i8* %x) nounwind readonly
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
+declare i64 @strlen(i8*) nounwind readonly
+; CHECK: declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
new file mode 100644
index 00000000000..d8c8e1e0202
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -0,0 +1,279 @@
+; RUN: opt < %s -instcombine -sroa -S | not grep " = alloca"
+; rdar://6417724
+; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >" = type { i32* }
+%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"struct.std::bidirectional_iterator_tag" = type <{ i8 }>
+%"struct.std::forward_iterator_tag" = type <{ i8 }>
+%"struct.std::input_iterator_tag" = type <{ i8 }>
+%"struct.std::random_access_iterator_tag" = type <{ i8 }>
+%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+
+define i32* @_Z3fooRSt6vectorIiSaIiEE(%"struct.std::vector<int,std::allocator<int> >"* %X) {
+entry:
+  %0 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %__first_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %__last_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %unnamed_arg.i = alloca %"struct.std::bidirectional_iterator_tag", align 8
+  %1 = alloca %"struct.std::bidirectional_iterator_tag"
+  %__first_addr.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %2 = alloca %"struct.std::bidirectional_iterator_tag"
+  %3 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %4 = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store i32 42, i32* %4, align 4
+  %5 = getelementptr %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
+  %6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >", %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0
+  %7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl", %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1
+  %8 = load i32*, i32** %7, align 4
+  %9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
+  store i32* %8, i32** %9, align 4
+  %10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
+  %11 = load i32*, i32** %10, align 4
+  %tmp2.i = ptrtoint i32* %11 to i32
+  %tmp1.i = inttoptr i32 %tmp2.i to i32*
+  %tmp3 = ptrtoint i32* %tmp1.i to i32
+  %tmp2 = inttoptr i32 %tmp3 to i32*
+  %12 = getelementptr %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
+  %13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >", %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0
+  %14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl", %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0
+  %15 = load i32*, i32** %14, align 4
+  %16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
+  store i32* %15, i32** %16, align 4
+  %17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
+  %18 = load i32*, i32** %17, align 4
+  %tmp2.i17 = ptrtoint i32* %18 to i32
+  %tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*
+  %tmp8 = ptrtoint i32* %tmp1.i18 to i32
+  %tmp6 = inttoptr i32 %tmp8 to i32*
+  %19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
+  store i32* %tmp6, i32** %19
+  %20 = getelementptr %"struct.std::bidirectional_iterator_tag", %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0
+  %21 = load i8, i8* %20, align 1
+  %22 = or i8 %21, 0
+  %23 = or i8 %22, 0
+  %24 = or i8 %23, 0
+  %25 = getelementptr %"struct.std::bidirectional_iterator_tag", %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0
+  store i8 0, i8* %25, align 1
+  %elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
+  %val.i = load i32*, i32** %elt.i
+  %tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*
+  %tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i1 false)
+  %26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %val.i, i32** %26
+  %27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  store i32* %tmp2, i32** %27
+  %28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %29 = load i32*, i32** %28, align 4
+  %30 = ptrtoint i32* %29 to i32
+  %31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %32 = load i32*, i32** %31, align 4
+  %33 = ptrtoint i32* %32 to i32
+  %34 = sub i32 %30, %33
+  %35 = ashr i32 %34, 2
+  %36 = ashr i32 %35, 2
+  br label %bb12.i.i
+
+bb.i.i:                                           ; preds = %bb12.i.i
+  %37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %38 = load i32*, i32** %37, align 4
+  %39 = load i32, i32* %38, align 4
+  %40 = load i32, i32* %4, align 4
+  %41 = icmp eq i32 %39, %40
+  %42 = zext i1 %41 to i8
+  %toBool.i.i = icmp ne i8 %42, 0
+  br i1 %toBool.i.i, label %bb1.i.i, label %bb2.i.i
+
+bb1.i.i:                                          ; preds = %bb.i.i
+  %43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %44 = load i32*, i32** %43, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb2.i.i:                                          ; preds = %bb.i.i
+  %45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %46 = load i32*, i32** %45, align 4
+  %47 = getelementptr i32, i32* %46, i64 1
+  %48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %47, i32** %48, align 4
+  %49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %50 = load i32*, i32** %49, align 4
+  %51 = load i32, i32* %50, align 4
+  %52 = load i32, i32* %4, align 4
+  %53 = icmp eq i32 %51, %52
+  %54 = zext i1 %53 to i8
+  %toBool3.i.i = icmp ne i8 %54, 0
+  br i1 %toBool3.i.i, label %bb4.i.i, label %bb5.i.i
+
+bb4.i.i:                                          ; preds = %bb2.i.i
+  %55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %56 = load i32*, i32** %55, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb5.i.i:                                          ; preds = %bb2.i.i
+  %57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %58 = load i32*, i32** %57, align 4
+  %59 = getelementptr i32, i32* %58, i64 1
+  %60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %59, i32** %60, align 4
+  %61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %62 = load i32*, i32** %61, align 4
+  %63 = load i32, i32* %62, align 4
+  %64 = load i32, i32* %4, align 4
+  %65 = icmp eq i32 %63, %64
+  %66 = zext i1 %65 to i8
+  %toBool6.i.i = icmp ne i8 %66, 0
+  br i1 %toBool6.i.i, label %bb7.i.i, label %bb8.i.i
+
+bb7.i.i:                                          ; preds = %bb5.i.i
+  %67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %68 = load i32*, i32** %67, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb8.i.i:                                          ; preds = %bb5.i.i
+  %69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %70 = load i32*, i32** %69, align 4
+  %71 = getelementptr i32, i32* %70, i64 1
+  %72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %71, i32** %72, align 4
+  %73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %74 = load i32*, i32** %73, align 4
+  %75 = load i32, i32* %74, align 4
+  %76 = load i32, i32* %4, align 4
+  %77 = icmp eq i32 %75, %76
+  %78 = zext i1 %77 to i8
+  %toBool9.i.i = icmp ne i8 %78, 0
+  br i1 %toBool9.i.i, label %bb10.i.i, label %bb11.i.i
+
+bb10.i.i:                                         ; preds = %bb8.i.i
+  %79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %80 = load i32*, i32** %79, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb11.i.i:                                         ; preds = %bb8.i.i
+  %81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %82 = load i32*, i32** %81, align 4
+  %83 = getelementptr i32, i32* %82, i64 1
+  %84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %83, i32** %84, align 4
+  %85 = sub i32 %__trip_count.0.i.i, 1
+  br label %bb12.i.i
+
+bb12.i.i:                                         ; preds = %bb11.i.i, %entry
+  %__trip_count.0.i.i = phi i32 [ %36, %entry ], [ %85, %bb11.i.i ]
+  %86 = icmp sgt i32 %__trip_count.0.i.i, 0
+  br i1 %86, label %bb.i.i, label %bb13.i.i
+
+bb13.i.i:                                         ; preds = %bb12.i.i
+  %87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %88 = load i32*, i32** %87, align 4
+  %89 = ptrtoint i32* %88 to i32
+  %90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %91 = load i32*, i32** %90, align 4
+  %92 = ptrtoint i32* %91 to i32
+  %93 = sub i32 %89, %92
+  %94 = ashr i32 %93, 2
+  switch i32 %94, label %bb26.i.i [
+    i32 1, label %bb22.i.i
+    i32 2, label %bb18.i.i
+    i32 3, label %bb14.i.i
+  ]
+
+bb14.i.i:                                         ; preds = %bb13.i.i
+  %95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %96 = load i32*, i32** %95, align 4
+  %97 = load i32, i32* %96, align 4
+  %98 = load i32, i32* %4, align 4
+  %99 = icmp eq i32 %97, %98
+  %100 = zext i1 %99 to i8
+  %toBool15.i.i = icmp ne i8 %100, 0
+  br i1 %toBool15.i.i, label %bb16.i.i, label %bb17.i.i
+
+bb16.i.i:                                         ; preds = %bb14.i.i
+  %101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %102 = load i32*, i32** %101, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb17.i.i:                                         ; preds = %bb14.i.i
+  %103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %104 = load i32*, i32** %103, align 4
+  %105 = getelementptr i32, i32* %104, i64 1
+  %106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %105, i32** %106, align 4
+  br label %bb18.i.i
+
+bb18.i.i:                                         ; preds = %bb17.i.i, %bb13.i.i
+  %107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %108 = load i32*, i32** %107, align 4
+  %109 = load i32, i32* %108, align 4
+  %110 = load i32, i32* %4, align 4
+  %111 = icmp eq i32 %109, %110
+  %112 = zext i1 %111 to i8
+  %toBool19.i.i = icmp ne i8 %112, 0
+  br i1 %toBool19.i.i, label %bb20.i.i, label %bb21.i.i
+
+bb20.i.i:                                         ; preds = %bb18.i.i
+  %113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %114 = load i32*, i32** %113, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb21.i.i:                                         ; preds = %bb18.i.i
+  %115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %116 = load i32*, i32** %115, align 4
+  %117 = getelementptr i32, i32* %116, i64 1
+  %118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %117, i32** %118, align 4
+  br label %bb22.i.i
+
+bb22.i.i:                                         ; preds = %bb21.i.i, %bb13.i.i
+  %119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %120 = load i32*, i32** %119, align 4
+  %121 = load i32, i32* %120, align 4
+  %122 = load i32, i32* %4, align 4
+  %123 = icmp eq i32 %121, %122
+  %124 = zext i1 %123 to i8
+  %toBool23.i.i = icmp ne i8 %124, 0
+  br i1 %toBool23.i.i, label %bb24.i.i, label %bb25.i.i
+
+bb24.i.i:                                         ; preds = %bb22.i.i
+  %125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %126 = load i32*, i32** %125, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb25.i.i:                                         ; preds = %bb22.i.i
+  %127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %128 = load i32*, i32** %127, align 4
+  %129 = getelementptr i32, i32* %128, i64 1
+  %130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %129, i32** %130, align 4
+  br label %bb26.i.i
+
+bb26.i.i:                                         ; preds = %bb25.i.i, %bb13.i.i
+  %131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %132 = load i32*, i32** %131, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit: ; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
+  %.0.0.i.i = phi i32* [ %132, %bb26.i.i ], [ %126, %bb24.i.i ], [ %114, %bb20.i.i ], [ %102, %bb16.i.i ], [ %80, %bb10.i.i ], [ %68, %bb7.i.i ], [ %56, %bb4.i.i ], [ %44, %bb1.i.i ]
+  %tmp2.i.i = ptrtoint i32* %.0.0.i.i to i32
+  %tmp1.i.i = inttoptr i32 %tmp2.i.i to i32*
+  %tmp4.i = ptrtoint i32* %tmp1.i.i to i32
+  %tmp3.i = inttoptr i32 %tmp4.i to i32*
+  %tmp8.i = ptrtoint i32* %tmp3.i to i32
+  %tmp6.i = inttoptr i32 %tmp8.i to i32*
+  %tmp12 = ptrtoint i32* %tmp6.i to i32
+  %tmp10 = inttoptr i32 %tmp12 to i32*
+  %tmp16 = ptrtoint i32* %tmp10 to i32
+  br label %return
+
+return:                                           ; preds = %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+  %tmp14 = inttoptr i32 %tmp16 to i32*
+  ret i32* %tmp14
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/2009-02-21-LoadCST.ll b/llvm/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
new file mode 100644
index 00000000000..90ec6d540e9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | grep "ret i32 3679669"
+; PR3595
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+@.str1 = internal constant [4 x i8] c"\B5%8\00"
+
+define i32 @test() {
+  %rhsv = load i32, i32* bitcast ([4 x i8]* @.str1 to i32*), align 1
+  ret i32 %rhsv
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll b/llvm/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
new file mode 100644
index 00000000000..ef1734ba7d1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR3667
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @_ada_c32001b(i32 %tmp5) {
+entry:
+	%max289 = select i1 false, i32 %tmp5, i32 0		; <i32> [#uses=1]
+	%tmp6 = mul i32 %max289, 4		; <i32> [#uses=1]
+	%tmp7 = alloca i8, i32 0		; <i8*> [#uses=1]
+	%tmp8 = bitcast i8* %tmp7 to [0 x [0 x i32]]*		; <[0 x [0 x i32]]*> [#uses=1]
+	%tmp11 = load i32, i32* null, align 1		; <i32> [#uses=1]
+	%tmp12 = icmp eq i32 %tmp11, 3		; <i1> [#uses=1]
+	%tmp13 = zext i1 %tmp12 to i8		; <i8> [#uses=1]
+	%tmp14 = ashr i32 %tmp6, 2		; <i32> [#uses=1]
+	%tmp15 = bitcast [0 x [0 x i32]]* %tmp8 to i8*		; <i8*> [#uses=1]
+	%tmp16 = mul i32 %tmp14, 4		; <i32> [#uses=1]
+	%tmp17 = mul i32 1, %tmp16		; <i32> [#uses=1]
+	%tmp18 = getelementptr i8, i8* %tmp15, i32 %tmp17		; <i8*> [#uses=1]
+	%tmp19 = bitcast i8* %tmp18 to [0 x i32]*		; <[0 x i32]*> [#uses=1]
+	%tmp20 = bitcast [0 x i32]* %tmp19 to i32*		; <i32*> [#uses=1]
+	%tmp21 = getelementptr i32, i32* %tmp20, i32 0		; <i32*> [#uses=1]
+	%tmp22 = load i32, i32* %tmp21, align 1		; <i32> [#uses=1]
+	%tmp23 = icmp eq i32 %tmp22, 4		; <i1> [#uses=1]
+	%tmp24 = zext i1 %tmp23 to i8		; <i8> [#uses=1]
+	%toBool709 = icmp ne i8 %tmp13, 0		; <i1> [#uses=1]
+	%toBool710 = icmp ne i8 %tmp24, 0		; <i1> [#uses=1]
+	%tmp25 = and i1 %toBool709, %toBool710		; <i1> [#uses=1]
+	%tmp26 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%toBool711 = icmp ne i8 %tmp26, 0		; <i1> [#uses=1]
+	br i1 %toBool711, label %a, label %b
+
+a:		; preds = %entry
+	ret void
+
+b:		; preds = %entry
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll b/llvm/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
new file mode 100644
index 00000000000..3847abd30c8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR3826
+
+define void @0(<4 x i16>*, <4 x i16>*) {
+	%3 = alloca <4 x i16>*		; <<4 x i16>**> [#uses=1]
+	%4 = load <4 x i16>, <4 x i16>* null, align 1		; <<4 x i16>> [#uses=1]
+	%5 = ashr <4 x i16> %4, <i16 5, i16 5, i16 5, i16 5>		; <<4 x i16>> [#uses=1]
+	%6 = load <4 x i16>*, <4 x i16>** %3		; <<4 x i16>*> [#uses=1]
+	store <4 x i16> %5, <4 x i16>* %6, align 1
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-03-24-InfLoop.ll b/llvm/test/Transforms/InstCombine/2009-03-24-InfLoop.ll
new file mode 100644
index 00000000000..4ce04a1eb54
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-03-24-InfLoop.ll
@@ -0,0 +1,9 @@
+; PR3874
+; RUN: opt < %s -instcombine | llvm-dis
+  define i1 @test(i32 %x) {
+    %A = lshr i32 3968, %x
+    %B = and i32 %A, 1
+    %C = icmp eq i32 %B, 0
+    ret i1 %C
+  }
+
diff --git a/llvm/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll b/llvm/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
new file mode 100644
index 00000000000..b79edf66b26
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | grep "mul i64"
+; rdar://6762288
+
+; Instcombine should not promote the mul to i96 because it is definitely
+; not a legal type for the target, and we don't want a libcall.
+
+define i96 @test(i96 %a.4, i96 %b.2) {
+	%tmp1086 = trunc i96 %a.4 to i64		; <i64> [#uses=1]
+	%tmp836 = trunc i96 %b.2 to i64		; <i64> [#uses=1]
+	%mul185 = mul i64 %tmp1086, %tmp836		; <i64> [#uses=1]
+	%tmp544 = zext i64 %mul185 to i96		; <i96> [#uses=1]
+	ret i96 %tmp544
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll b/llvm/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
new file mode 100644
index 00000000000..ced317c4d43
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep cmp
+; rdar://6903175
+
+define i1 @f0(i32 *%a) nounwind {
+       %b = load i32, i32* %a, align 4
+       %c = uitofp i32 %b to double
+       %d = fcmp ogt double %c, 0x41EFFFFFFFE00000
+       ret i1 %d
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll b/llvm/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
new file mode 100644
index 00000000000..468c1cd8bbc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep "store i32 0,"
+; PR4366
+
+define void @a() {
+  store i32 0, i32 addrspace(1)* null
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll b/llvm/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
new file mode 100644
index 00000000000..6beedf83cd6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep srem
+; PR3439
+
+define i32 @a(i32 %x) nounwind {
+entry:
+	%rem = srem i32 %x, 2
+	%and = and i32 %rem, 2
+	ret i32 %and
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll b/llvm/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll
new file mode 100644
index 00000000000..41940fe885e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR4495
+
+define i32 @test(i64 %test) {
+entry:
+	%0 = bitcast <4 x i32> undef to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%t12 = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>		; <<16 x i8>> [#uses=1]
+	%t11 = bitcast <16 x i8> %t12 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%t9 = extractelement <2 x i64> %t11, i32 0		; <i64> [#uses=1]
+	%t10 = bitcast i64 %t9 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%t7 = bitcast i64 %test to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%t6 = xor <2 x i32> %t10, %t7		; <<2 x i32>> [#uses=1]
+	%t1 = extractelement <2 x i32> %t6, i32 0		; <i32> [#uses=1]
+	ret i32 %t1
+}
diff --git a/llvm/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll b/llvm/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll
new file mode 100644
index 00000000000..c438ca5fd17
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@.str254 = internal constant [2 x i8] c".\00"
+@.str557 = internal constant [3 x i8] c"::\00"
+
+define i8* @demangle_qualified(i32 %isfuncname) nounwind {
+entry:
+  %tobool272 = icmp ne i32 %isfuncname, 0
+  %cond276 = select i1 %tobool272, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str254, i32 0, i32 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str557, i32 0, i32 0) ; <i8*> [#uses=4]
+  %cmp.i504 = icmp eq i8* %cond276, null
+  %rval = getelementptr i8, i8* %cond276, i1 %cmp.i504
+  ret i8* %rval
+}
+
+; CHECK: %cond276 = select i1
+; CHECK: ret i8* %cond276
diff --git a/llvm/test/Transforms/InstCombine/2010-01-28-NegativeSRem.ll b/llvm/test/Transforms/InstCombine/2010-01-28-NegativeSRem.ll
new file mode 100644
index 00000000000..4ab9bf0c3f5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2010-01-28-NegativeSRem.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR6165
+
+define i32 @f() {
+entry:
+  br label %BB1
+
+BB1:                                              ; preds = %BB1, %entry
+; CHECK: BB1:
+  %x = phi i32 [ -29, %entry ], [ 0, %BB1 ]       ; <i32> [#uses=2]
+  %rem = srem i32 %x, 2                           ; <i32> [#uses=1]
+  %t = icmp eq i32 %rem, -1                       ; <i1> [#uses=1]
+  br i1 %t, label %BB2, label %BB1
+; CHECK-NOT: br i1 false
+
+BB2:                                              ; preds = %BB1
+; CHECK: BB2:
+  ret i32 %x
+}
diff --git a/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
new file mode 100644
index 00000000000..ad0fe5a2178
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -0,0 +1,32 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-linux-gnu"
+
+@g_92 = common global [2 x i32*] zeroinitializer, align 4 ; <[2 x i32*]*> [#uses=1]
+@g_177 = constant i32** bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i32*]* @g_92 to i8*), i64 4) to i32**), align 4 ; <i32***> [#uses=1]
+
+define i1 @PR6486() nounwind {
+; CHECK-LABEL: @PR6486(
+  %tmp = load i32**, i32*** @g_177                       ; <i32**> [#uses=1]
+  %cmp = icmp ne i32** null, %tmp                 ; <i1> [#uses=1]
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  %cmp1 = icmp sle i32 0, %conv                   ; <i1> [#uses=1]
+  ret i1 %cmp1
+; CHECK: ret i1 true
+}
+
+@d = common global i32 0, align 4
+@a = common global [1 x i32] zeroinitializer, align 4
+
+define i1 @PR16462_1() nounwind {
+; CHECK-LABEL: @PR16462_1(
+  ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 65535)
+; CHECK: ret i1 false
+}
+
+define i1 @PR16462_2() nounwind {
+; CHECK-LABEL: @PR16462_2(
+  ret i1 icmp sgt (i32 sext (i16 trunc (i32 select (i1 icmp eq (i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0), i32* @d), i32 0, i32 1) to i16) to i32), i32 42)
+; CHECK: ret i1 false
+}
diff --git a/llvm/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll b/llvm/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
new file mode 100644
index 00000000000..51610698c24
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; PR7265
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.anon = type { i32, [4 x i8] }
+
+@.str = private constant [3 x i8] c"%s\00"
+
+define void @CopyEventArg(%union.anon* %ev) nounwind {
+entry:
+  %call = call i32 (i8*, i8*, ...) @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind
+; CHECK: bitcast %union.anon* %ev to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+  ret void
+}
+
+declare i32 @sprintf(i8*, i8*, ...)
+
diff --git a/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
new file mode 100644
index 00000000000..7f2826071a9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -0,0 +1,57 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; <rdar://problem/8606771>
+define i32 @main(i32 %argc) {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:    [[TMP3151:%.*]] = trunc i32 %argc to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[TMP3151]], 5
+; CHECK-NEXT:    [[TMP4126:%.*]] = and i8 [[TMP1]], 64
+; CHECK-NEXT:    [[TMP4127:%.*]] = xor i8 [[TMP4126]], 64
+; CHECK-NEXT:    [[TMP4086:%.*]] = zext i8 [[TMP4127]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4086]]
+;
+  %tmp3151 = trunc i32 %argc to i8
+  %tmp3161 = or i8 %tmp3151, -17
+  %tmp3162 = and i8 %tmp3151, 122
+  %tmp3163 = xor i8 %tmp3162, -17
+  %tmp4114 = shl i8 %tmp3163, 6
+  %tmp4115 = xor i8 %tmp4114, %tmp3163
+  %tmp4120 = xor i8 %tmp3161, %tmp4115
+  %tmp4126 = lshr i8 %tmp4120, 7
+  %tmp4127 = mul i8 %tmp4126, 64
+  %tmp4086 = zext i8 %tmp4127 to i32
+  ret i32 %tmp4086
+}
+
+; rdar://8739316
+define i8 @foo(i8 %arg, i8 %arg1) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[TMP:%.*]] = shl i8 %arg, 7
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 %arg1, 84
+; CHECK-NEXT:    [[TMP3:%.*]] = and i8 %arg1, -118
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 %arg1, 33
+; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw i8 40, [[TMP2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = and i8 [[TMP5]], 84
+; CHECK-NEXT:    [[TMP7:%.*]] = or i8 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i8 [[TMP]], [[TMP3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = or i8 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i8 [[TMP8]], 7
+; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i8 [[TMP10]], 5
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i8 [[TMP11]], [[TMP9]]
+; CHECK-NEXT:    ret i8 [[TMP12]]
+;
+  %tmp = shl i8 %arg, 7
+  %tmp2 = and i8 %arg1, 84
+  %tmp3 = and i8 %arg1, -118
+  %tmp4 = and i8 %arg1, 33
+  %tmp5 = sub i8 -88, %tmp2
+  %tmp6 = and i8 %tmp5, 84
+  %tmp7 = or i8 %tmp4, %tmp6
+  %tmp8 = xor i8 %tmp, %tmp3
+  %tmp9 = or i8 %tmp7, %tmp8
+  %tmp10 = lshr i8 %tmp8, 7
+  %tmp11 = shl i8 %tmp10, 5
+  %tmp12 = xor i8 %tmp11, %tmp9
+  ret i8 %tmp12
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll b/llvm/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
new file mode 100644
index 00000000000..798c726e566
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define {}* @foo({}* %x, i32 %n) {
+; CHECK-LABEL: @foo(
+; CHECK-NOT: getelementptr
+  %p = getelementptr {}, {}* %x, i32 %n
+  ret {}* %p
+}
+
+define i8* @bar(i64 %n, {{}, [0 x {[0 x i8]}]}* %p) {
+; CHECK-LABEL: @bar(
+  %g = getelementptr {{}, [0 x {[0 x i8]}]}, {{}, [0 x {[0 x i8]}]}* %p, i64 %n, i32 1, i64 %n, i32 0, i64 %n
+; CHECK: %p, i64 0, i32 1, i64 0, i32 0, i64 %n
+  ret i8* %g
+}
diff --git a/llvm/test/Transforms/InstCombine/2010-11-23-Distributed.ll b/llvm/test/Transforms/InstCombine/2010-11-23-Distributed.ll
new file mode 100644
index 00000000000..20bfed87798
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2010-11-23-Distributed.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+define i32 @foo(i32 %x, i32 %y) {
+; CHECK-LABEL: @foo(
+  %add = add nsw i32 %y, %x
+  %mul = mul nsw i32 %add, %y
+  %square = mul nsw i32 %y, %y
+  %res = sub i32 %mul, %square
+  ret i32 %res
+; CHECK-NEXT: mul i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i1 @bar(i64 %x, i64 %y) {
+; CHECK-LABEL: @bar(
+  %a = and i64 %y, %x
+; CHECK: and
+; CHECK-NOT: and
+  %not = xor i64 %a, -1
+  %b = and i64 %y, %not
+  %r = icmp eq i64 %b, 0
+  ret i1 %r
+; CHECK: ret i1
+}
diff --git a/llvm/test/Transforms/InstCombine/2011-02-14-InfLoop.ll b/llvm/test/Transforms/InstCombine/2011-02-14-InfLoop.ll
new file mode 100644
index 00000000000..6d8a7ddbe46
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2011-02-14-InfLoop.ll
@@ -0,0 +1,19 @@
+; This testcase causes an infinite loop in the instruction combiner,
+; because it changes a pattern and the original pattern is almost
+; identical to the newly-generated pattern.
+; RUN: opt < %s -instcombine -disable-output
+
+;PR PR9216
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x float> @m_387(i8* noalias nocapture %A, i8* nocapture %B, <4 x i1> %C) nounwind {
+entry:
+  %movcsext20 = sext <4 x i1> %C to <4 x i32>
+  %tmp2389 = xor <4 x i32> %movcsext20, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %movcand25 = and <4 x i32> %tmp2389, <i32 undef, i32 undef, i32 undef, i32 -1>
+  %movcor26 = or <4 x i32> %movcand25, zeroinitializer
+  %L2 = bitcast <4 x i32> %movcor26 to <4 x float>
+  %L3 = shufflevector <4 x float> zeroinitializer, <4 x float> %L2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %L3
+}
diff --git a/llvm/test/Transforms/InstCombine/2011-03-08-SRemMinusOneBadOpt.ll b/llvm/test/Transforms/InstCombine/2011-03-08-SRemMinusOneBadOpt.ll
new file mode 100644
index 00000000000..6a3e3e40e6d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2011-03-08-SRemMinusOneBadOpt.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR9346
+
+define i32 @test(i64 %x) nounwind {
+; CHECK: ret i32 0
+entry:
+  %or = or i64 %x, 4294967294
+  %conv = trunc i64 %or to i32
+  %rem.i = srem i32 %conv, -1
+  ret i32 %rem.i
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll b/llvm/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll
new file mode 100644
index 00000000000..116c9713d89
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine
+; PR9579
+
+define <2 x i16> @entry(<2 x i16> %a) nounwind {
+entry:
+  %a.addr = alloca <2 x i16>, align 4
+  %.compoundliteral = alloca <2 x i16>, align 4
+  store <2 x i16> %a, <2 x i16>* %a.addr, align 4
+  %tmp = load <2 x i16>, <2 x i16>* %a.addr, align 4
+  store <2 x i16> zeroinitializer, <2 x i16>* %.compoundliteral
+  %tmp1 = load <2 x i16>, <2 x i16>* %.compoundliteral
+  %cmp = icmp uge <2 x i16> %tmp, %tmp1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  ret <2 x i16> %sext
+}
diff --git a/llvm/test/Transforms/InstCombine/2011-05-13-InBoundsGEP.ll b/llvm/test/Transforms/InstCombine/2011-05-13-InBoundsGEP.ll
new file mode 100644
index 00000000000..69568705cfd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2011-05-13-InBoundsGEP.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+; rdar://problem/9267970
+; ideally this test will run on a 32-bit host
+; must not discard GEPs that might overflow at runtime (aren't inbounds)
+
+define i32 @main(i32 %argc) {
+entry:
+    %tmp1 = add i32 %argc, -2
+    %tmp2 = add i32 %argc, 1879048192
+    %p = alloca i8
+; CHECK: getelementptr
+    %p1 = getelementptr i8, i8* %p, i32 %tmp1
+; CHECK: getelementptr
+    %p2 = getelementptr i8, i8* %p, i32 %tmp2
+    %cmp = icmp ult i8* %p1, %p2
+    br i1 %cmp, label %bbtrue, label %bbfalse
+bbtrue:          ; preds = %entry
+    ret i32 -1
+bbfalse:         ; preds = %entry
+    ret i32 0
+}
diff --git a/llvm/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll b/llvm/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll
new file mode 100644
index 00000000000..a746ccdc48c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "x86_64-apple-macosx10.6.6"
+
+define zeroext i16 @foo1(i32 %on_off) {
+; CHECK-LABEL: @foo1(
+; CHECK-NEXT:    [[ON_OFF_TR:%.*]] = trunc i32 %on_off to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 [[ON_OFF_TR]], 1
+; CHECK-NEXT:    [[CONV:%.*]] = add i16 [[TMP1]], -2
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+  %on_off.addr = alloca i32, align 4
+  %a = alloca i32, align 4
+  store i32 %on_off, i32* %on_off.addr, align 4
+  %tmp = load i32, i32* %on_off.addr, align 4
+  %sub = sub i32 1, %tmp
+  %mul = mul i32 %sub, -2
+  store i32 %mul, i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
+  %conv = trunc i32 %tmp1 to i16
+  ret i16 %conv
+}
+
+define zeroext i16 @foo2(i32 %on_off, i32 %q) {
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT:    [[SUBA:%.*]] = sub i32 %on_off, %q
+; CHECK-NEXT:    [[SUBA_TR:%.*]] = trunc i32 [[SUBA]] to i16
+; CHECK-NEXT:    [[CONV:%.*]] = shl i16 [[SUBA_TR]], 2
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+  %on_off.addr = alloca i32, align 4
+  %q.addr = alloca i32, align 4
+  %a = alloca i32, align 4
+  store i32 %on_off, i32* %on_off.addr, align 4
+  store i32 %q, i32* %q.addr, align 4
+  %tmp = load i32, i32* %q.addr, align 4
+  %tmp1 = load i32, i32* %on_off.addr, align 4
+  %sub = sub i32 %tmp, %tmp1
+  %mul = mul i32 %sub, -4
+  store i32 %mul, i32* %a, align 4
+  %tmp2 = load i32, i32* %a, align 4
+  %conv = trunc i32 %tmp2 to i16
+  ret i16 %conv
+}
+
+define zeroext i16 @foo3(i32 %on_off) {
+; CHECK-LABEL: @foo3(
+; CHECK-NEXT:    [[ON_OFF_TR:%.*]] = trunc i32 %on_off to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 [[ON_OFF_TR]], 2
+; CHECK-NEXT:    [[CONV:%.*]] = add i16 [[TMP1]], -28
+; CHECK-NEXT:    ret i16 [[CONV]]
+;
+  %on_off.addr = alloca i32, align 4
+  %a = alloca i32, align 4
+  store i32 %on_off, i32* %on_off.addr, align 4
+  %tmp = load i32, i32* %on_off.addr, align 4
+  %sub = sub i32 7, %tmp
+  %mul = mul i32 %sub, -4
+  store i32 %mul, i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
+  %conv = trunc i32 %tmp1 to i16
+  ret i16 %conv
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
new file mode 100644
index 00000000000..15c11db37f5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+; CHECK-LABEL: define void @fu1(
+define void @fu1(i32 %parm) nounwind ssp {
+  %1 = alloca i32, align 4
+; CHECK: alloca double*
+  %ptr = alloca double*, align 4
+  store i32 %parm, i32* %1, align 4
+  store double* null, double** %ptr, align 4
+  %2 = load i32, i32* %1, align 4
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %4, label %10
+
+; <label>:4                                       ; preds = %0
+  %5 = load i32, i32* %1, align 4
+  %6 = shl nsw i32 %5, 3
+; With "nsw", the alloca and its bitcast can be fused:
+  %7 = add nsw i32 %6, 2048
+;  CHECK: alloca double
+  %8 = alloca i8, i32 %7
+  %9 = bitcast i8* %8 to double*
+; CHECK-NEXT: store double*
+  store double* %9, double** %ptr, align 4
+  br label %10
+; <label>:10                                      ; preds = %4, %0
+  %11 = load double*, double** %ptr, align 4
+  call void @bar(double* %11)
+; CHECK: ret
+  ret void
+}
+
+declare void @bar(double*)
+
+; CHECK-LABEL: define void @fu2(
+define void @fu2(i32 %parm) nounwind ssp {
+  %1 = alloca i32, align 4
+  %ptr = alloca double*, align 4
+  store i32 %parm, i32* %1, align 4
+  store double* null, double** %ptr, align 4
+  %2 = load i32, i32* %1, align 4
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %4, label %10
+
+; <label>:4                                       ; preds = %0
+  %5 = load i32, i32* %1, align 4
+  %6 = mul nsw i32 %5, 8
+; Without "nsw", the alloca and its bitcast cannot be fused:
+  %7 = add  i32 %6, 2048
+; CHECK: alloca i8
+  %8 = alloca i8, i32 %7
+; CHECK-NEXT: bitcast double**
+; CHECK-NEXT: store i8*
+  %9 = bitcast i8* %8 to double*
+  store double* %9, double** %ptr, align 4
+  br label %10
+
+; <label>:10                                      ; preds = %4, %0
+  %11 = load double*, double** %ptr, align 4
+  call void @bar(double* %11)
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2011-09-03-Trampoline.ll b/llvm/test/Transforms/InstCombine/2011-09-03-Trampoline.ll
new file mode 100644
index 00000000000..5765d318530
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2011-09-03-Trampoline.ll
@@ -0,0 +1,102 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @llvm.init.trampoline(i8*, i8*, i8*)
+declare i8* @llvm.adjust.trampoline(i8*)
+declare i32 @f(i8 * nest, i32)
+
+; Most common case
+define i32 @test0(i32 %n) !dbg !4 {
+  %alloca = alloca [10 x i8], align 16
+  %gep = getelementptr [10 x i8], [10 x i8]* %alloca, i32 0, i32 0
+  call void @llvm.init.trampoline(i8* %gep, i8* bitcast (i32 (i8*, i32)* @f to i8*),
+                                  i8* null)
+  %tramp = call i8* @llvm.adjust.trampoline(i8* %gep)
+  %function = bitcast i8* %tramp to i32(i32)*
+  %ret = call i32 %function(i32 %n), !dbg !10
+  ret i32 %ret
+
+; CHECK: define i32 @test0(i32 %n) !dbg !4 {
+; CHECK: %ret = call i32 @f(i8* nest null, i32 %n), !dbg !10
+}
+
+define i32 @test1(i32 %n, i8* %trampmem) {
+  call void @llvm.init.trampoline(i8* %trampmem,
+                                  i8* bitcast (i32 (i8*, i32)* @f to i8*),
+                                  i8* null)
+  %tramp = call i8* @llvm.adjust.trampoline(i8* %trampmem)
+  %function = bitcast i8* %tramp to i32(i32)*
+  %ret = call i32 %function(i32 %n)
+  ret i32 %ret
+; CHECK: define i32 @test1(i32 %n, i8* %trampmem) {
+; CHECK: %ret = call i32 @f(i8* nest null, i32 %n)
+}
+
+define i32 @test2(i32 %n, i8* %trampmem) {
+  %tramp = call i8* @llvm.adjust.trampoline(i8* %trampmem)
+  %functiona = bitcast i8* %tramp to i32(i32)*
+  %ret = call i32 %functiona(i32 %n)
+  ret i32 %ret
+; CHECK: define i32 @test2(i32 %n, i8* %trampmem) {
+; CHECK: %ret = call i32 %functiona(i32 %n)
+}
+
+define i32 @test3(i32 %n, i8* %trampmem) {
+  call void @llvm.init.trampoline(i8* %trampmem,
+                                  i8* bitcast (i32 (i8*, i32)* @f to i8*),
+                                  i8* null)
+
+; CHECK: define i32 @test3(i32 %n, i8* %trampmem) {
+; CHECK: %ret0 = call i32 @f(i8* nest null, i32 %n)
+  %tramp0 = call i8* @llvm.adjust.trampoline(i8* %trampmem)
+  %function0 = bitcast i8* %tramp0 to i32(i32)*
+  %ret0 = call i32 %function0(i32 %n)
+
+  ;; Not optimized since previous call could be writing.
+  %tramp1 = call i8* @llvm.adjust.trampoline(i8* %trampmem)
+  %function1 = bitcast i8* %tramp1 to i32(i32)*
+  %ret1 = call i32 %function1(i32 %n)
+; CHECK: %ret1 = call i32 %function1(i32 %n)
+
+  ret i32 %ret1
+}
+
+define i32 @test4(i32 %n) {
+  %alloca = alloca [10 x i8], align 16
+  %gep = getelementptr [10 x i8], [10 x i8]* %alloca, i32 0, i32 0
+  call void @llvm.init.trampoline(i8* %gep, i8* bitcast (i32 (i8*, i32)* @f to i8*),
+                                  i8* null)
+
+  %tramp0 = call i8* @llvm.adjust.trampoline(i8* %gep)
+  %function0 = bitcast i8* %tramp0 to i32(i32)*
+  %ret0 = call i32 %function0(i32 %n)
+
+  %tramp1 = call i8* @llvm.adjust.trampoline(i8* %gep)
+  %function1 = bitcast i8* %tramp0 to i32(i32)*
+  %ret1 = call i32 %function1(i32 %n)
+
+  %tramp2 = call i8* @llvm.adjust.trampoline(i8* %gep)
+  %function2 = bitcast i8* %tramp2 to i32(i32)*
+  %ret2 = call i32 %function2(i32 %n)
+
+  ret i32 %ret2
+
+; CHECK: define i32 @test4(i32 %n) {
+; CHECK: %ret0 = call i32 @f(i8* nest null, i32 %n)
+; CHECK: %ret1 = call i32 @f(i8* nest null, i32 %n)
+; CHECK: %ret2 = call i32 @f(i8* nest null, i32 %n)
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2)
+!1 = !DIFile(filename: "string.h", directory: "Game")
+!2 = !{}
+!3 = !{i32 1, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "passthru", scope: !1, file: !1, line: 79, type: !5, isLocal: true, isDefinition: true, scopeLine: 79, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, scope: !0, baseType: null, size: 64, align: 64)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 78, type: !7)
+!10 = !DILocation(line: 78, column: 28, scope: !4)
diff --git a/llvm/test/Transforms/InstCombine/2011-10-07-AlignPromotion.ll b/llvm/test/Transforms/InstCombine/2011-10-07-AlignPromotion.ll
new file mode 100644
index 00000000000..122669ec04a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2011-10-07-AlignPromotion.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; rdar://problem/10063307
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.0.0"
+
+%0 = type { [2 x i32] }
+%struct.CGPoint = type { float, float }
+
+define void @t(%struct.CGPoint* %a) nounwind {
+  %Point = alloca %struct.CGPoint, align 4
+  %1 = bitcast %struct.CGPoint* %a to i64*
+  %2 = bitcast %struct.CGPoint* %Point to i64*
+  %3 = load i64, i64* %1, align 4
+  store i64 %3, i64* %2, align 4
+  call void @foo(i64* %2) nounwind
+  ret void
+; CHECK: %Point = alloca i64, align 4
+}
+
+declare void @foo(i64*)
diff --git a/llvm/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll b/llvm/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
new file mode 100644
index 00000000000..abab9dc5702
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -disable-output
+
+%opaque_struct = type opaque
+
+@G = external global [0 x %opaque_struct]
+
+declare void @foo(%opaque_struct*)
+
+define void @bar() {
+  call void @foo(%opaque_struct* bitcast ([0 x %opaque_struct]* @G to %opaque_struct*))
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-02-13-FCmp.ll b/llvm/test/Transforms/InstCombine/2012-02-13-FCmp.ll
new file mode 100644
index 00000000000..586f86de3c2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-02-13-FCmp.ll
@@ -0,0 +1,35 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; Radar 10803727
+@.str = private unnamed_addr constant [35 x i8] c"\0Ain_range input (should be 0): %f\0A\00", align 1
+@.str1 = external hidden unnamed_addr constant [35 x i8], align 1
+
+declare i32 @printf(i8*, ...)
+define i64 @_Z8tempCastj(i32 %val) uwtable ssp {
+entry:
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str1, i64 0, i64 0), i32 %val)
+  %conv = uitofp i32 %val to double
+  %call.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str, i64 0, i64 0), double %conv)
+  %cmp.i = fcmp oge double %conv, -1.000000e+00
+  br i1 %cmp.i, label %land.rhs.i, label %if.end.critedge
+; CHECK:  br i1 true, label %land.rhs.i, label %if.end.critedge
+
+land.rhs.i:                                       ; preds = %entry
+  %cmp1.i = fcmp olt double %conv, 1.000000e+00
+  br i1 %cmp1.i, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.rhs.i
+  %add = fadd double %conv, 5.000000e-01
+  %conv3 = fptosi double %add to i64
+  br label %return
+
+if.end.critedge:                                  ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.critedge, %land.rhs.i
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i64 [ %conv3, %if.then ], [ -1, %if.end ]
+  ret i64 %retval.0
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll b/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll
new file mode 100644
index 00000000000..82cf85fa4cd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-02-28-ICmp.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/10803154>
+
+; There should be no transformation.
+; CHECK: %a = trunc i32 %x to i8
+; CHECK: %b = icmp ne i8 %a, 0
+; CHECK: %c = and i32 %x, 16711680
+; CHECK: %d = icmp ne i32 %c, 0
+; CHECK: %e = and i1 %b, %d
+; CHECK: ret i1 %e
+
+define i1 @f1(i32 %x) {
+  %a = trunc i32 %x to i8
+  %b = icmp ne i8 %a, 0
+  %c = and i32 %x, 16711680
+  %d = icmp ne i32 %c, 0
+  %e = and i1 %b, %d
+  ret i1 %e
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll b/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
new file mode 100644
index 00000000000..d1860bccd75
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+; Derived from gcc.c-torture/execute/frame-address.c
+
+; CHECK-LABEL:     @func(
+; CHECK:     return:
+; CHECK-NOT: ret i32 0
+; CHECK:     ret i32 %retval
+
+define i32 @func(i8* %c, i8* %f) nounwind uwtable readnone noinline ssp {
+entry:
+  %d = alloca i8, align 1
+  store i8 0, i8* %d, align 1
+  %cmp = icmp ugt i8* %d, %c
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp2 = icmp ule i8* %d, %f
+  %not.cmp1 = icmp uge i8* %c, %f
+  %.cmp2 = and i1 %cmp2, %not.cmp1
+  %land.ext = zext i1 %.cmp2 to i32
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp uge i8* %d, %f
+  %not.cmp3 = icmp ule i8* %c, %f
+  %.cmp5 = and i1 %cmp5, %not.cmp3
+  %land.ext7 = zext i1 %.cmp5 to i32
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %land.ext, %if.then ], [ %land.ext7, %if.else ]
+  ret i32 %retval.0
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2012-04-24-vselect.ll b/llvm/test/Transforms/InstCombine/2012-04-24-vselect.ll
new file mode 100644
index 00000000000..211d401a3bc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-04-24-vselect.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK-LABEL: @foo(
+; CHECK: <i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+define <8 x i32> @foo() nounwind {
+entry:
+  %v1.i = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>,
+    <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>,
+    <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %v1.i
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2012-04-30-SRem.ll b/llvm/test/Transforms/InstCombine/2012-04-30-SRem.ll
new file mode 100644
index 00000000000..a285d5aea5e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-04-30-SRem.ll
@@ -0,0 +1,12 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; PR12541
+
+define i32 @foo(i32 %x) {
+  %y = xor i32 %x, 3
+  %z = srem i32 1656690544, %y
+  %sext = shl i32 %z, 24
+  %s = ashr exact i32 %sext, 24
+  ret i32 %s
+; CHECK-NOT: and
+; The shifts were wrongly being turned into an and with 112
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-05-28-select-hang.ll b/llvm/test/Transforms/InstCombine/2012-05-28-select-hang.ll
new file mode 100644
index 00000000000..c514dd1f5ec
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-05-28-select-hang.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+@c = common global i8 0, align 1
+@a = common global i8 0, align 1
+@b = common global i8 0, align 1
+
+define void @func() nounwind uwtable ssp {
+entry:
+  %0 = load i8, i8* @c, align 1
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 1
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* @a, align 1
+  %conv2 = zext i8 %conv1 to i32
+  %neg = xor i32 %conv2, -1
+  %and = and i32 1, %neg
+  %conv3 = trunc i32 %and to i8
+  store i8 %conv3, i8* @b, align 1
+  %1 = load i8, i8* @a, align 1
+  %conv4 = zext i8 %1 to i32
+  %conv5 = zext i8 %conv3 to i32
+  %tobool = icmp ne i32 %conv4, 0
+  br i1 %tobool, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %entry
+  %tobool8 = icmp ne i32 %conv5, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %entry
+  %2 = phi i1 [ false, %entry ], [ %tobool8, %land.rhs ]
+  %land.ext = zext i1 %2 to i32
+  %mul = mul nsw i32 3, %land.ext
+  %conv9 = trunc i32 %mul to i8
+  store i8 %conv9, i8* @a, align 1
+  ret void
+
+; CHECK-LABEL: @func(
+; CHECK-NOT: select
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll b/llvm/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll
new file mode 100644
index 00000000000..4af1ca842c5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll
@@ -0,0 +1,162 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/10889741>
+
+define void @func(double %r, double %g, double %b, double* %outH, double* %outS, double* %outL) nounwind uwtable ssp {
+bb:
+  %tmp = alloca double, align 8
+  %tmp1 = alloca double, align 8
+  %tmp2 = alloca double, align 8
+  store double %r, double* %tmp, align 8
+  store double %g, double* %tmp1, align 8
+  store double %b, double* %tmp2, align 8
+  %tmp3 = fcmp ogt double %r, %g
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb
+  %tmp5 = fcmp ogt double %r, %b
+  br i1 %tmp5, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb4
+  br label %bb12
+
+bb7:                                              ; preds = %bb4
+  br label %bb12
+
+bb8:                                              ; preds = %bb
+  %tmp9 = fcmp ogt double %g, %b
+  br i1 %tmp9, label %bb10, label %bb11
+
+bb10:                                             ; preds = %bb8
+  br label %bb12
+
+bb11:                                             ; preds = %bb8
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb10, %bb7, %bb6
+  %max.0 = phi double* [ %tmp, %bb6 ], [ %tmp2, %bb7 ], [ %tmp1, %bb10 ], [ %tmp2, %bb11 ]
+; CHECK: %tmp13 = load double, double* %tmp, align 8
+; CHECK: %tmp14 = load double, double* %tmp1, align 8
+; CHECK: %tmp15 = fcmp olt double %tmp13, %tmp14
+  %tmp13 = load double, double* %tmp, align 8
+  %tmp14 = load double, double* %tmp1, align 8
+  %tmp15 = fcmp olt double %tmp13, %tmp14
+  br i1 %tmp15, label %bb16, label %bb21
+
+bb16:                                             ; preds = %bb12
+  %tmp17 = load double, double* %tmp2, align 8
+  %tmp18 = fcmp olt double %tmp13, %tmp17
+  br i1 %tmp18, label %bb19, label %bb20
+
+bb19:                                             ; preds = %bb16
+  br label %bb26
+
+bb20:                                             ; preds = %bb16
+  br label %bb26
+
+bb21:                                             ; preds = %bb12
+  %tmp22 = load double, double* %tmp2, align 8
+  %tmp23 = fcmp olt double %tmp14, %tmp22
+  br i1 %tmp23, label %bb24, label %bb25
+
+bb24:                                             ; preds = %bb21
+  br label %bb26
+
+bb25:                                             ; preds = %bb21
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb24, %bb20, %bb19
+  %min.0 = phi double* [ %tmp, %bb19 ], [ %tmp2, %bb20 ], [ %tmp1, %bb24 ], [ %tmp2, %bb25 ]
+; CHECK: %tmp27 = load double, double* %min.0, align 8
+; CHECK: %tmp28 = load double, double* %max.0
+; CHECK: %tmp29 = fadd double %tmp27, %tmp28
+  %tmp27 = load double, double* %min.0, align 8
+  %tmp28 = load double, double* %max.0
+  %tmp29 = fadd double %tmp27, %tmp28
+  %tmp30 = fdiv double %tmp29, 2.000000e+00
+  store double %tmp30, double* %outL
+  %tmp31 = load double, double* %min.0
+  %tmp32 = load double, double* %max.0
+  %tmp33 = fcmp oeq double %tmp31, %tmp32
+  br i1 %tmp33, label %bb34, label %bb35
+
+bb34:                                             ; preds = %bb26
+  store double 0.000000e+00, double* %outS
+  store double 0.000000e+00, double* %outH
+  br label %bb81
+
+bb35:                                             ; preds = %bb26
+  %tmp36 = fcmp olt double %tmp30, 5.000000e-01
+  %tmp37 = fsub double %tmp32, %tmp31
+  br i1 %tmp36, label %bb38, label %bb41
+
+bb38:                                             ; preds = %bb35
+  %tmp39 = fadd double %tmp32, %tmp31
+  %tmp40 = fdiv double %tmp37, %tmp39
+  store double %tmp40, double* %outS
+  br label %bb45
+
+bb41:                                             ; preds = %bb35
+  %tmp42 = fsub double 2.000000e+00, %tmp32
+  %tmp43 = fsub double %tmp42, %tmp31
+  %tmp44 = fdiv double %tmp37, %tmp43
+  store double %tmp44, double* %outS
+  br label %bb45
+
+bb45:                                             ; preds = %bb41, %bb38
+  %tmp46 = icmp eq double* %max.0, %tmp
+  br i1 %tmp46, label %bb47, label %bb55
+
+bb47:                                             ; preds = %bb45
+  %tmp48 = load double, double* %tmp1, align 8
+  %tmp49 = load double, double* %tmp2, align 8
+  %tmp50 = fsub double %tmp48, %tmp49
+  %tmp51 = load double, double* %max.0
+  %tmp52 = load double, double* %min.0
+  %tmp53 = fsub double %tmp51, %tmp52
+  %tmp54 = fdiv double %tmp50, %tmp53
+  store double %tmp54, double* %outH
+  br label %bb75
+
+bb55:                                             ; preds = %bb45
+  %tmp56 = icmp eq double* %max.0, %tmp1
+  br i1 %tmp56, label %bb57, label %bb66
+
+bb57:                                             ; preds = %bb55
+  %tmp58 = load double, double* %tmp2, align 8
+  %tmp59 = load double, double* %tmp, align 8
+  %tmp60 = fsub double %tmp58, %tmp59
+  %tmp61 = load double, double* %max.0
+  %tmp62 = load double, double* %min.0
+  %tmp63 = fsub double %tmp61, %tmp62
+  %tmp64 = fdiv double %tmp60, %tmp63
+  %tmp65 = fadd double 2.000000e+00, %tmp64
+  store double %tmp65, double* %outH
+  br label %bb75
+
+bb66:                                             ; preds = %bb55
+  %tmp67 = load double, double* %tmp, align 8
+  %tmp68 = load double, double* %tmp1, align 8
+  %tmp69 = fsub double %tmp67, %tmp68
+  %tmp70 = load double, double* %max.0
+  %tmp71 = load double, double* %min.0
+  %tmp72 = fsub double %tmp70, %tmp71
+  %tmp73 = fdiv double %tmp69, %tmp72
+  %tmp74 = fadd double 4.000000e+00, %tmp73
+  store double %tmp74, double* %outH
+  br label %bb75
+
+bb75:                                             ; preds = %bb66, %bb57, %bb47
+  %tmp76 = load double, double* %outH
+  %tmp77 = fdiv double %tmp76, 6.000000e+00
+  store double %tmp77, double* %outH
+  %tmp78 = fcmp olt double %tmp77, 0.000000e+00
+  br i1 %tmp78, label %bb79, label %bb81
+
+bb79:                                             ; preds = %bb75
+  %tmp80 = fadd double %tmp77, 1.000000e+00
+  store double %tmp80, double* %outH
+  br label %bb81
+
+bb81:                                             ; preds = %bb79, %bb75, %bb34
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-07-25-LoadPart.ll b/llvm/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
new file mode 100644
index 00000000000..71255ebbf81
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -data-layout="e-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=LE
+; RUN: opt < %s -data-layout="E-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=BE
+; PR13442
+
+@test = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+
+define i64 @foo() {
+  %ret = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([4 x i32]* @test to i8*), i64 2) to i64*), align 1
+  ret i64 %ret
+  ; 0x00030000_00020000 in [01 00/00 00 02 00 00 00 03 00/00 00 04 00 00 00]
+  ; LE: ret i64 844424930263040
+  ; 0x00000200_00000300 in [00 00/00 01 00 00 00 02 00 00/00 03 00 00 00 04]
+  ; BE: ret i64 281474976841728
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/llvm/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
new file mode 100644
index 00000000000..66653806788
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: addrspacecast
+
+@base = internal unnamed_addr addrspace(3) global [16 x i32] zeroinitializer, align 16
+declare void @foo(i32*)
+
+define void @test() nounwind {
+  call void @foo(i32* getelementptr (i32, i32* addrspacecast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll b/llvm/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
new file mode 100644
index 00000000000..0374bd52afd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; rdar://12182093
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK-LABEL: @udiv400(
+; CHECK: udiv i32 %x, 400
+; CHECK: ret
+define i32 @udiv400(i32 %x) {
+entry:
+  %div = lshr i32 %x, 2
+  %div1 = udiv i32 %div, 100
+  ret i32 %div1
+}
+
+
+; CHECK-LABEL: @udiv400_no(
+; CHECK: ashr
+; CHECK: div
+; CHECK: ret
+define i32 @udiv400_no(i32 %x) {
+entry:
+  %div = ashr i32 %x, 2
+  %div1 = udiv i32 %div, 100
+  ret i32 %div1
+}
+
+; CHECK-LABEL: @sdiv400_yes(
+; CHECK: udiv i32 %x, 400
+; CHECK: ret
+define i32 @sdiv400_yes(i32 %x) {
+entry:
+  %div = lshr i32 %x, 2
+  ; The sign bits of both operands are zero (i.e. we can prove they are
+  ; unsigned inputs), turn this into a udiv.
+  ; Next, optimize this just like sdiv.
+  %div1 = sdiv i32 %div, 100
+  ret i32 %div1
+}
+
+
+; CHECK-LABEL: @udiv_i80(
+; CHECK: udiv i80 %x, 400
+; CHECK: ret
+define i80 @udiv_i80(i80 %x) {
+  %div = lshr i80 %x, 2
+  %div1 = udiv i80 %div, 100
+  ret i80 %div1
+}
+
+define i32 @no_crash_notconst_udiv(i32 %x, i32 %notconst) {
+  %div = lshr i32 %x, %notconst
+  %div1 = udiv i32 %div, 100
+  ret i32 %div1
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll b/llvm/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll
new file mode 100644
index 00000000000..1c5a9813e6b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; When merging zero sized alloca check that requested alignments of the allocas
+; are obeyed.
+
+@x = global i8* null, align 8
+@y = global i8* null, align 8
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK-LABEL: @f(
+; CHECK-NEXT: alloca [0 x i8], align 1024
+; CHECK-NOT: alloca
+; CHECK: ret void
+define void @f() {
+  %1 = alloca [0 x i8], align 1
+  %2 = alloca [0 x i8], align 1024
+  %3 = getelementptr inbounds [0 x i8], [0 x i8]* %1, i64 0, i64 0
+  %4 = getelementptr inbounds [0 x i8], [0 x i8]* %2, i64 0, i64 0
+  store i8* %3, i8** @x, align 8
+  store i8* %4, i8** @y, align 8
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll b/llvm/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll
new file mode 100644
index 00000000000..23210650e2f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -instcombine -S
+
+; Make sure that we don't crash when optimizing the vectors of pointers.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.hoge = type { double*, double*, double*, double** }
+
+define void @widget(%struct.hoge* nocapture %arg) nounwind uwtable ssp {
+bb:
+  %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %arg, i64 0, i32 0
+  br i1 undef, label %bb1, label %bb17
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  br label %bb17
+
+bb3:                                              ; preds = %bb1
+  %tmp4 = bitcast double** %tmp to <2 x double*>*
+  %tmp5 = load <2 x double*>, <2 x double*>* %tmp4, align 8
+  %tmp6 = ptrtoint <2 x double*> %tmp5 to <2 x i64>
+  %tmp7 = sub <2 x i64> zeroinitializer, %tmp6
+  %tmp8 = ashr exact <2 x i64> %tmp7, <i64 3, i64 3>
+  %tmp9 = extractelement <2 x i64> %tmp8, i32 0
+  %tmp10 = add nsw i64 undef, %tmp9
+  br i1 undef, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb3
+  br label %bb13
+
+bb12:                                             ; preds = %bb3
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb11
+  br i1 undef, label %bb16, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br i1 undef, label %bb16, label %bb15
+
+bb15:                                             ; preds = %bb14
+  br label %bb16
+
+bb16:                                             ; preds = %bb15, %bb14, %bb13
+  unreachable
+
+bb17:                                             ; preds = %bb2, %bb
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll b/llvm/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
new file mode 100644
index 00000000000..46702f80c0c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define <4 x i32> @foo(<4 x i32*>* %in) {
+  %t17 = load <4 x i32*>, <4 x i32*>* %in, align 8
+  %t18 = icmp eq <4 x i32*> %t17, zeroinitializer
+  %t19 = zext <4 x i1> %t18 to <4 x i32>
+  ret <4 x i32> %t19
+}
diff --git a/llvm/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll b/llvm/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
new file mode 100644
index 00000000000..466629cb5fd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR12234
+
+@g = extern_weak global i32
+define i32 @function(i32 %x) nounwind {
+entry:
+  %xor = xor i32 %x, 1
+  store volatile i32 %xor, i32* inttoptr (i64 1 to i32*), align 4
+  %or4 = or i32 or (i32 zext (i1 icmp eq (i32* @g, i32* null) to i32), i32 1), %xor
+  ret i32 %or4
+}
+; CHECK-LABEL: define i32 @function(
diff --git a/llvm/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll b/llvm/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll
new file mode 100644
index 00000000000..cb527f86406
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2012-6-7-vselect-bitcast.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: bitcast
+
+define void @foo(<16 x i8> %a, <16 x i8> %b, <4 x i32>* %c) {
+  %aa = bitcast <16 x i8> %a to <4 x i32>
+  %bb = bitcast <16 x i8> %b to <4 x i32>
+  %select_v = select <4 x i1> zeroinitializer, <4 x i32> %aa, <4 x i32> %bb
+  store <4 x i32> %select_v, <4 x i32>* %c, align 4
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll b/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
new file mode 100644
index 00000000000..9425c29a427
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct._my_struct = type <{ [12 x i8], [4 x i8] }>
+
+@initval = common global %struct._my_struct zeroinitializer, align 1
+
+; InstCombine will try to change the %struct._my_struct alloca into an
+; allocation of an i96 because of the bitcast to create %2. That's not valid,
+; as the other 32 bits of the structure still feed into the return value
+define { i64, i64 } @function(i32 %x, i32 %y, i32 %z) nounwind {
+; CHECK-LABEL: @function(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %retval = alloca %struct._my_struct, align 8
+; CHECK-NOT: bitcast i96* %retval to %struct._my_struct*
+entry:
+  %retval = alloca %struct._my_struct, align 8
+  %k.sroa.0.0.copyload = load i96, i96* bitcast (%struct._my_struct* @initval to i96*), align 1
+  %k.sroa.1.12.copyload = load i32, i32* bitcast ([4 x i8]* getelementptr inbounds (%struct._my_struct, %struct._my_struct* @initval, i64 0, i32 1) to i32*), align 1
+  %0 = zext i32 %x to i96
+  %bf.value = shl nuw nsw i96 %0, 6
+  %bf.clear = and i96 %k.sroa.0.0.copyload, -288230376151711744
+  %1 = zext i32 %y to i96
+  %bf.value2 = shl nuw nsw i96 %1, 32
+  %bf.shl3 = and i96 %bf.value2, 288230371856744448
+  %bf.value.masked = and i96 %bf.value, 4294967232
+  %2 = zext i32 %z to i96
+  %bf.value8 = and i96 %2, 63
+  %bf.clear4 = or i96 %bf.shl3, %bf.value.masked
+  %bf.set5 = or i96 %bf.clear4, %bf.value8
+  %bf.set10 = or i96 %bf.set5, %bf.clear
+  %retval.0.cast7 = bitcast %struct._my_struct* %retval to i96*
+  store i96 %bf.set10, i96* %retval.0.cast7, align 8
+  %retval.12.idx8 = getelementptr inbounds %struct._my_struct, %struct._my_struct* %retval, i64 0, i32 1
+  %retval.12.cast9 = bitcast [4 x i8]* %retval.12.idx8 to i32*
+  store i32 %k.sroa.1.12.copyload, i32* %retval.12.cast9, align 4
+  %trunc = trunc i96 %bf.set10 to i64
+  %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %trunc, 0
+  %retval.8.idx12 = getelementptr inbounds %struct._my_struct, %struct._my_struct* %retval, i64 0, i32 0, i64 8
+  %retval.8.cast13 = bitcast i8* %retval.8.idx12 to i64*
+  %retval.8.load14 = load i64, i64* %retval.8.cast13, align 8
+  %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.8.load14, 1
+  ret { i64, i64 } %.fca.1.insert
+}
diff --git a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll
new file mode 100644
index 00000000000..04fb7d91193
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll
@@ -0,0 +1,71 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; ARM64 neon intrinsic variants - <rdar://problem/12349617>
+; REQUIRES: aarch64
+
+define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+}
+
+define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @constantMulARM64() nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
+}
+
+define <4 x i32> @constantMulSARM64() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <4 x i32> @constantMulUARM64() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+}
+
+define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+  %b = add <4 x i32> zeroinitializer, %a
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %b = add <4 x i32> %x, %a
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %b
+}
+
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind readnone ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
new file mode 100644
index 00000000000..c38385907be
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; ARM64 AES intrinsic variants
+
+define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: @combineXorAeseZeroARM64(
+; CHECK-NEXT:    %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
+; CHECK-NEXT:    ret <16 x i8> %data.aes
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer)
+  ret <16 x i8> %data.aes
+}
+
+define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: @combineXorAeseNonZeroARM64(
+; CHECK-NEXT:    %data.xor = xor <16 x i8> %data, %key
+; CHECK-NEXT:    %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    ret <16 x i8> %data.aes
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+  ret <16 x i8> %data.aes
+}
+
+define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: @combineXorAesdZeroARM64(
+; CHECK-NEXT:    %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key)
+; CHECK-NEXT:    ret <16 x i8> %data.aes
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer)
+  ret <16 x i8> %data.aes
+}
+
+define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: @combineXorAesdNonZeroARM64(
+; CHECK-NEXT:    %data.xor = xor <16 x i8> %data, %key
+; CHECK-NEXT:    %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    ret <16 x i8> %data.aes
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+  ret <16 x i8> %data.aes
+}
+
+declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #0
+declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
+
diff --git a/llvm/test/Transforms/InstCombine/AArch64/lit.local.cfg b/llvm/test/Transforms/InstCombine/AArch64/lit.local.cfg
new file mode 100644
index 00000000000..7184443994b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/InstCombine/AArch64/tbl1.ll b/llvm/test/Transforms/InstCombine/AArch64/tbl1.ll
new file mode 100644
index 00000000000..176e8d0a3dc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/tbl1.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-arm-none-eabi"
+
+; Turning a table lookup intrinsic into a shuffle vector instruction
+; can be beneficial. If the mask used for the lookup is the constant
+; vector {7,6,5,4,3,2,1,0}, then the back-end generates rev64
+; instructions instead.
+
+define <8 x i8> @tbl1_8x8(<16 x i8> %vec) {
+; CHECK-LABEL: @tbl1_8x8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[VEC:%.*]], <16 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i8> [[TMP0]]
+;
+entry:
+  %tbl1 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vec, <8 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <8 x i8> %tbl1
+}
+
+; Bail the optimization if a mask index is out of range.
+define <8 x i8> @tbl1_8x8_out_of_range(<16 x i8> %vec) {
+; CHECK-LABEL: @tbl1_8x8_out_of_range(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TBL1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VEC:%.*]], <8 x i8> <i8 8, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; CHECK-NEXT:    ret <8 x i8> [[TBL1]]
+;
+entry:
+  %tbl1 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vec, <8 x i8> <i8 8, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <8 x i8> %tbl1
+}
+
+; Bail the optimization if the size of the return vector is not 8 elements.
+define <16 x i8> @tbl1_16x8(<16 x i8> %vec) {
+; CHECK-LABEL: @tbl1_16x8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TBL1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[VEC:%.*]], <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; CHECK-NEXT:    ret <16 x i8> [[TBL1]]
+;
+entry:
+  %tbl1 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %vec, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <16 x i8> %tbl1
+}
+
+; Bail the optimization if the elements of the return vector are not of type i8.
+define <8 x i16> @tbl1_8x16(<16 x i8> %vec) {
+; CHECK-LABEL: @tbl1_8x16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TBL1:%.*]] = call <8 x i16> @llvm.aarch64.neon.tbl1.v8i16(<16 x i8> [[VEC:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+; CHECK-NEXT:    ret <8 x i16> [[TBL1]]
+;
+entry:
+  %tbl1 = call <8 x i16> @llvm.aarch64.neon.tbl1.v8i16(<16 x i8> %vec, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %tbl1
+}
+
+; The type <8 x i16> is not a valid return type for this intrinsic,
+; but we want to test that the optimization won't trigger for vector
+; elements of type different than i8.
+declare <8 x i16> @llvm.aarch64.neon.tbl1.v8i16(<16 x i8>, <8 x i16>)
+
+declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>)
+declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
new file mode 100644
index 00000000000..9c45cf5aa11
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
@@ -0,0 +1,2407 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.buffer.load
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @buffer_load_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  ret float %data
+}
+
+; CHECK-LABEL: @buffer_load_v1f32(
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <1 x float> %data
+define amdgpu_ps <1 x float> @buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  ret <1 x float> %data
+}
+
+; CHECK-LABEL: @buffer_load_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  ret <2 x float> %data
+}
+
+; CHECK-LABEL: @buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <4 x float> %data
+define amdgpu_ps <4 x float> @buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  ret <4 x float> %data
+}
+
+; CHECK-LABEL: @extract_elt0_buffer_load_v2f32(
+; CHECK: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_buffer_load_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <2 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_buffer_load_v4f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <4 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <4 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt3_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <4 x float> %data, i32 3
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt2_elt3_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <3 x float> %data
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_elt3_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt2_elt3_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; FIXME: Not handled even though only 2 elts used
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32_2(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 1
+; CHECK-NEXT: %ins0 = insertvalue { float, float } undef, float %elt0, 0
+; CHECK-NEXT: %ins1 = insertvalue { float, float } %ins0, float %elt1, 1
+; CHECK-NEXT: ret { float, float } %ins1
+define amdgpu_ps { float, float } @extract_elt0_elt1_buffer_load_v4f32_2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  %elt1 = extractelement <4 x float> %data, i32 1
+  %ins0 = insertvalue { float, float } undef, float %elt0, 0
+  %ins1 = insertvalue { float, float } %ins0, float %elt1, 1
+  ret { float, float } %ins1
+}
+
+; CHECK-LABEL: @extract_elt0_buffer_load_v3f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt0 = extractelement <3 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <3 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %elt1 = extractelement <3 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @preserve_metadata_extract_elt0_buffer_load_v2f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false), !fpmath !0
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @preserve_metadata_extract_elt0_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false), !fpmath !0
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.buffer.load.format
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @buffer_load_format_v1f32(
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 true)
+; CHECK-NEXT: ret <1 x float> %data
+define amdgpu_ps <1 x float> @buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 true)
+  ret <1 x float> %data
+}
+
+; CHECK-LABEL: @extract_elt0_buffer_load_format_v2f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 true, i1 false)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 true, i1 false)
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; The initial insertion point is at the extractelement
+; CHECK-LABEL: @extract01_bitcast_buffer_load_format_v4f32(
+; CHECK-NEXT: %tmp = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
+; CHECK-NEXT: %1 = shufflevector <2 x float> %tmp, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %tmp1 = bitcast <4 x float> %1 to <2 x double>
+; CHECK-NEXT: %tmp2 = extractelement <2 x double> %tmp1, i32 0
+; CHECK-NEXT: ret double %tmp2
+define double @extract01_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
+  %tmp1 = bitcast <4 x float> %tmp to <2 x double>
+  %tmp2 = extractelement <2 x double> %tmp1, i32 0
+  ret double %tmp2
+}
+
+; CHECK-LABEL: @extract0_bitcast_buffer_load_format_v4f32(
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
+; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
+; CHECK-NEXT: ret i32 %tmp2
+define i32 @extract0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
+  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
+  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
+  ret i32 %tmp2
+}
+
+; CHECK-LABEL: @extract_lo16_0_bitcast_buffer_load_format_v4f32(
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
+; CHECK-NEXT: %1 = bitcast float %tmp to i32
+; CHECK-NEXT: %tmp2 = trunc i32 %1 to i16
+; CHECK-NEXT: ret i16 %tmp2
+define i16 @extract_lo16_0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3
+  %tmp1 = bitcast <4 x float> %tmp to <8 x i16>
+  %tmp2 = extractelement <8 x i16> %tmp1, i32 0
+  ret i16 %tmp2
+}
+
+declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <1 x float> @llvm.amdgcn.buffer.load.format.v1f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.raw.buffer.load
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @raw_buffer_load_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  ret float %data
+}
+
+; CHECK-LABEL: @raw_buffer_load_v1f32(
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <1 x float> %data
+define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  ret <1 x float> %data
+}
+
+; CHECK-LABEL: @raw_buffer_load_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  ret <2 x float> %data
+}
+
+; CHECK-LABEL: @raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <4 x float> %data
+define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  ret <4 x float> %data
+}
+
+; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32(
+; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <2 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 3
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <3 x float> %data
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <3 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <3 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <3 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32(
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
+; CHECK-NEXT: ret i32 %tmp2
+define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
+  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
+  ret i32 %tmp2
+}
+
+; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32(
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
+; CHECK-NEXT: ret float %tmp2
+define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp2 = extractelement <4 x float> %tmp1, i32 0
+  ret float %tmp2
+}
+
+; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #1
+declare <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32>, i32, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #1
+declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
+
+declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.raw.buffer.load.format
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @raw_buffer_load_format_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  ret float %data
+}
+
+; CHECK-LABEL: @raw_buffer_load_format_v1f32(
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <1 x float> %data
+define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  ret <1 x float> %data
+}
+
+; CHECK-LABEL: @raw_buffer_load_format_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  ret <2 x float> %data
+}
+
+; CHECK-LABEL: @raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <4 x float> %data
+define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  ret <4 x float> %data
+}
+
+; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32(
+; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <2 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 3
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <3 x float> %data
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <3 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <3 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <3 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32(
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
+; CHECK-NEXT: ret i32 %tmp2
+define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
+  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
+  ret i32 %tmp2
+}
+
+; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32(
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
+; CHECK-NEXT: ret float %tmp2
+define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
+  %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp2 = extractelement <4 x float> %tmp1, i32 0
+  ret float %tmp2
+}
+
+; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #1
+declare <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #1
+declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1
+
+declare <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32>, i32, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.struct.buffer.load
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @struct_buffer_load_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  ret float %data
+}
+
+; CHECK-LABEL: @struct_buffer_load_v1f32(
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <1 x float> %data
+define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  ret <1 x float> %data
+}
+
+; CHECK-LABEL: @struct_buffer_load_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  ret <2 x float> %data
+}
+
+; CHECK-LABEL: @struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <4 x float> %data
+define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  ret <4 x float> %data
+}
+
+; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32(
+; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <2 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 3
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <3 x float> %data
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <3 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <3 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <3 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32(
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
+; CHECK-NEXT: ret i32 %tmp2
+define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
+  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
+  ret i32 %tmp2
+}
+
+; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32(
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
+; CHECK-NEXT: ret float %tmp2
+define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp2 = extractelement <4 x float> %tmp1, i32 0
+  ret float %tmp2
+}
+
+; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1
+declare <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32>, i32, i32, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1
+declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
+
+declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.struct.buffer.load.format
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @struct_buffer_load_format_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  ret float %data
+}
+
+; CHECK-LABEL: @struct_buffer_load_format_v1f32(
+; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <1 x float> %data
+define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  ret <1 x float> %data
+}
+
+; CHECK-LABEL: @struct_buffer_load_format_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  ret <2 x float> %data
+}
+
+; CHECK-LABEL: @struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <4 x float> %data
+define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  ret <4 x float> %data
+}
+
+; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32(
+; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <2 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <4 x float> %data, i32 3
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <3 x float> %data
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt0 = extractelement <3 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <3 x float> %data, i32 1
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
+; CHECK-NEXT: ret float %elt1
+define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %elt1 = extractelement <3 x float> %data, i32 2
+  ret float %elt1
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: ret <2 x float>
+define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x float> %shuf
+define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32(
+; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
+; CHECK-NEXT: ret i32 %tmp2
+define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
+  %tmp2 = extractelement <4 x i32> %tmp1, i32 0
+  ret i32 %tmp2
+}
+
+; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4i32(
+; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
+; CHECK-NEXT: ret float %tmp2
+define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
+  %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp2 = extractelement <4 x float> %tmp1, i32 0
+  ret float %tmp2
+}
+
+; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
+  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
+  %elt0 = extractelement <2 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #1
+declare <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #1
+declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1
+
+declare <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32>, i32, i32, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; Check that the intrinsic remains unchanged in the presence of TFE or LWE
+; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe(
+; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0)
+; CHECK: ret float %elt0
+define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0)
+  %data.vec = extractvalue {<4 x float>,i32} %data, 0
+  %elt0 = extractelement <4 x float> %data.vec, i32 0
+  ret float %elt0
+}
+
+; Check that the intrinsic remains unchanged in the presence of TFE or LWE
+; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe(
+; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0)
+; CHECK: ret float %elt0
+define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0)
+  %data.vec = extractvalue {<4 x float>,i32} %data, 0
+  %elt0 = extractelement <4 x float> %data.vec, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
+; CHECK-NEXT: ret float undef
+define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1darray.f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %1 = insertelement <2 x float> undef, float %data, i32 0
+; CHECK-NEXT: ret <2 x float> %1
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %1 = insertelement <3 x float> undef, float %data, i32 0
+; CHECK-NEXT: ret <3 x float> %1
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <3 x float> %data
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <3 x float> %data
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32(i32 2, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 1
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.d
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 4, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 2
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.d.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32(i32 8, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 3
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.l
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 4, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <2 x float> %data, i32 1
+  ret float %elt0
+}
+
+declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.b
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32(i32 8, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 1
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.b.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32(i32 12, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.lz
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32(i32 10, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuf
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cd
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32(i32 14, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <3 x float> %data
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x float> %shuf
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cd.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(
+; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret half %data
+define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x half> %data, i32 0
+  ret half %elt0
+}
+
+declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32(i32 1, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.d
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.d.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.l
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32(i32 1, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.b
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.b.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.lz
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cd
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cd.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.d.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.d.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.l.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.b.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.b.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.lz.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cd.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.cd.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.d.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.d.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.l.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.b.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.b.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.lz.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cd.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample.c.cd.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4
+; --------------------------------------------------------------------
+
+; Don't handle gather4*
+
+; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32(
+; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.l
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.b
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.b.cl
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.lz
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.l.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.b.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.b.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.lz.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.l.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.b.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.b.cl.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.gather4.c.lz.o
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.getlod
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.load
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.load.mip
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 1, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.getresinfo
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 1, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+
+; --------------------------------------------------------------------
+; TFE / LWE
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32(
+; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 {
+  %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+  %rgba = extractvalue { <4 x float>, i32 } %data, 0
+  %elt0 = extractelement <4 x float> %rgba, i32 0
+  ret float %elt0
+}
+
+declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32, i32, <8 x i32>, i32, i32) #1
+
+; CHECK: @tfe_check_assert(
+; CHECK: %data = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1)
+; CHECK-NEXT: ret float %data
+define amdgpu_hs float @tfe_check_assert() #0 {
+  %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
+!0 = !{float 2.500000e+00}
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
new file mode 100644
index 00000000000..a065d10946d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -0,0 +1,2098 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.rcp
+; --------------------------------------------------------------------
+
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
+declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_undef
+; CHECK-NEXT: ret float undef
+define float @test_constant_fold_rcp_f32_undef() nounwind {
+  %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_1
+; CHECK-NEXT: ret float 1.000000e+00
+define float @test_constant_fold_rcp_f32_1() nounwind {
+  %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_1
+; CHECK-NEXT:  ret double 1.000000e+00
+define double @test_constant_fold_rcp_f64_1() nounwind {
+  %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_half
+; CHECK-NEXT: ret float 2.000000e+00
+define float @test_constant_fold_rcp_f32_half() nounwind {
+  %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_half
+; CHECK-NEXT:  ret double 2.000000e+00
+define double @test_constant_fold_rcp_f64_half() nounwind {
+  %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43
+; CHECK-NEXT: call float @llvm.amdgcn.rcp.f32(float 4.300000e+01)
+define float @test_constant_fold_rcp_f32_43() nounwind {
+ %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_43
+; CHECK-NEXT: call double @llvm.amdgcn.rcp.f64(double 4.300000e+01)
+define double @test_constant_fold_rcp_f64_43() nounwind {
+  %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
+  ret double %val
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.rsq
+; --------------------------------------------------------------------
+
+declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rsq_f32_undef
+; CHECK-NEXT: ret float undef
+define float @test_constant_fold_rsq_f32_undef() nounwind {
+  %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone
+  ret float %val
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.frexp.mant
+; --------------------------------------------------------------------
+
+declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
+declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
+
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
+; CHECK-NEXT: ret float undef
+define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float undef)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef(
+; CHECK-NEXT:  ret double undef
+define double @test_constant_fold_frexp_mant_f64_undef() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double undef)
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0(
+; CHECK-NEXT: ret float 0.000000e+00
+define float @test_constant_fold_frexp_mant_f32_0() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0(
+; CHECK-NEXT:  ret double 0.000000e+00
+define double @test_constant_fold_frexp_mant_f64_0() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0)
+  ret double %val
+}
+
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0(
+; CHECK-NEXT: ret float -0.000000e+00
+define float @test_constant_fold_frexp_mant_f32_n0() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0(
+; CHECK-NEXT:  ret double -0.000000e+00
+define double @test_constant_fold_frexp_mant_f64_n0() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0)
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1(
+; CHECK-NEXT: ret float 5.000000e-01
+define float @test_constant_fold_frexp_mant_f32_1() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1(
+; CHECK-NEXT:  ret double 5.000000e-01
+define double @test_constant_fold_frexp_mant_f64_1() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0)
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1(
+; CHECK-NEXT: ret float -5.000000e-01
+define float @test_constant_fold_frexp_mant_f32_n1() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1(
+; CHECK-NEXT:  ret double -5.000000e-01
+define double @test_constant_fold_frexp_mant_f64_n1() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0)
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan(
+; CHECK-NEXT: ret float 0x7FF8000000000000
+define float @test_constant_fold_frexp_mant_f32_nan() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan(
+; CHECK-NEXT:  ret double 0x7FF8000000000000
+define double @test_constant_fold_frexp_mant_f64_nan() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000)
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf(
+; CHECK-NEXT: ret float 0x7FF0000000000000
+define float @test_constant_fold_frexp_mant_f32_inf() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf(
+; CHECK-NEXT:  ret double 0x7FF0000000000000
+define double @test_constant_fold_frexp_mant_f64_inf() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000)
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf(
+; CHECK-NEXT: ret float 0xFFF0000000000000
+define float @test_constant_fold_frexp_mant_f32_ninf() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf(
+; CHECK-NEXT:  ret double 0xFFF0000000000000
+define double @test_constant_fold_frexp_mant_f64_ninf() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000)
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num(
+; CHECK-NEXT: ret float 0x3FEFFFFFE0000000
+define float @test_constant_fold_frexp_mant_f32_max_num() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num(
+; CHECK-NEXT:  ret double 0x3FEFFFFFFFFFFFFF
+define double @test_constant_fold_frexp_mant_f64_max_num() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF)
+  ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num(
+; CHECK-NEXT: ret float 5.000000e-01
+define float @test_constant_fold_frexp_mant_f32_min_num() nounwind {
+  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000)
+  ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num(
+; CHECK-NEXT:  ret double 5.000000e-01
+define double @test_constant_fold_frexp_mant_f64_min_num() nounwind {
+  %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324)
+  ret double %val
+}
+
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.frexp.exp
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
+; CHECK-NEXT: ret i32 undef
+define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef(
+; CHECK-NEXT:  ret i32 undef
+define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_0() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0(
+; CHECK-NEXT:  ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_0() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0(
+; CHECK-NEXT:  ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024(
+; CHECK-NEXT: ret i32 11
+define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024(
+; CHECK-NEXT:  ret i32 11
+define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024(
+; CHECK-NEXT: ret i32 11
+define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024(
+; CHECK-NEXT:  ret i32 11
+define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024(
+; CHECK-NEXT: ret i32 -9
+define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024(
+; CHECK-NEXT:  ret i32 -9
+define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan(
+; CHECK-NEXT:  ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf(
+; CHECK-NEXT:  ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf(
+; CHECK-NEXT:  ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num(
+; CHECK-NEXT: ret i32 128
+define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num(
+; CHECK-NEXT:  ret i32 1024
+define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num(
+; CHECK-NEXT: ret i32 -148
+define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000)
+  ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num(
+; CHECK-NEXT:  ret i32 -1073
+define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind {
+  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324)
+  ret i32 %val
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.class
+; --------------------------------------------------------------------
+
+declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone
+declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone
+
+; CHECK-LABEL: @test_class_undef_mask_f32(
+; CHECK: ret i1 false
+define i1 @test_class_undef_mask_f32(float %x) nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_over_max_mask_f32(
+; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1)
+define i1 @test_class_over_max_mask_f32(float %x) nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_no_mask_f32(
+; CHECK: ret i1 false
+define i1 @test_class_no_mask_f32(float %x) nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_full_mask_f32(
+; CHECK: ret i1 true
+define i1 @test_class_full_mask_f32(float %x) nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_undef_no_mask_f32(
+; CHECK: ret i1 false
+define i1 @test_class_undef_no_mask_f32() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_undef_full_mask_f32(
+; CHECK: ret i1 true
+define i1 @test_class_undef_full_mask_f32() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_undef_val_f32(
+; CHECK: ret i1 undef
+define i1 @test_class_undef_val_f32() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_undef_undef_f32(
+; CHECK: ret i1 undef
+define i1 @test_class_undef_undef_f32() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_var_mask_f32(
+; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
+define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_isnan_f32(
+; CHECK: %val = fcmp uno float %x, 0.000000e+00
+define i1 @test_class_isnan_f32(float %x) nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_is_p0_n0_f32(
+; CHECK: %val = fcmp oeq float %x, 0.000000e+00
+define i1 @test_class_is_p0_n0_f32(float %x) nounwind {
+  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_snan_test_snan_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_snan_test_snan_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_qnan_test_qnan_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_qnan_test_snan_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_ninf_test_ninf_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_pinf_test_ninf_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_qnan_test_ninf_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_snan_test_ninf_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_nzero_test_nzero_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_pzero_test_nzero_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_pzero_test_pzero_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_nzero_test_pzero_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64(
+; CHECK: ret i1 true
+define i1 @test_constant_class_pinf_test_pinf_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_ninf_test_pinf_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_qnan_test_pinf_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64(
+; CHECK: ret i1 false
+define i1 @test_constant_class_snan_test_pinf_f64() nounwind {
+  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512)
+  ret i1 %val
+}
+
+; CHECK-LABEL: @test_class_is_snan_nnan_src(
+; CHECK-NEXT: ret i1 false
+define i1 @test_class_is_snan_nnan_src(float %x) {
+  %nnan = fadd nnan float %x, 1.0
+  %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 1)
+  ret i1 %class
+}
+
+; CHECK-LABEL: @test_class_is_qnan_nnan_src(
+; CHECK-NEXT: ret i1 false
+define i1 @test_class_is_qnan_nnan_src(float %x) {
+  %nnan = fadd nnan float %x, 1.0
+  %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 2)
+  ret i1 %class
+}
+
+; CHECK-LABEL: @test_class_is_nan_nnan_src(
+; CHECK-NEXT: ret i1 false
+define i1 @test_class_is_nan_nnan_src(float %x) {
+  %nnan = fadd nnan float %x, 1.0
+  %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 3)
+  ret i1 %class
+}
+
+; CHECK-LABEL: @test_class_is_nan_other_nnan_src(
+; CHECK-NEXT: %nnan = fadd nnan float %x, 1.000000e+00
+; CHECK-NEXT: %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 264)
+define i1 @test_class_is_nan_other_nnan_src(float %x) {
+  %nnan = fadd nnan float %x, 1.0
+  %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 267)
+  ret i1 %class
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.cos
+; --------------------------------------------------------------------
+declare float @llvm.amdgcn.cos.f32(float) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+; CHECK-LABEL: @cos_fneg_f32(
+; CHECK: %cos = call float @llvm.amdgcn.cos.f32(float %x)
+; CHECK-NEXT: ret float %cos
+define float @cos_fneg_f32(float %x) {
+  %x.fneg = fsub float -0.0, %x
+  %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
+  ret float %cos
+}
+
+; CHECK-LABEL: @cos_fabs_f32(
+; CHECK-NEXT: %cos = call float @llvm.amdgcn.cos.f32(float %x)
+; CHECK-NEXT: ret float %cos
+define float @cos_fabs_f32(float %x) {
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs)
+  ret float %cos
+}
+
+; CHECK-LABEL: @cos_fabs_fneg_f32(
+; CHECK-NEXT: %cos = call float @llvm.amdgcn.cos.f32(float %x)
+; CHECK-NEXT: ret float %cos
+define float @cos_fabs_fneg_f32(float %x) {
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %x.fabs.fneg = fsub float -0.0, %x.fabs
+  %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
+  ret float %cos
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.cvt.pkrtz
+; --------------------------------------------------------------------
+
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+
+; CHECK-LABEL: @vars_lhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
+define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @constant_lhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float %y)
+define <2 x half> @constant_lhs_cvt_pkrtz(float %y) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @constant_rhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.000000e+00)
+define <2 x half> @constant_rhs_cvt_pkrtz(float %x) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @undef_lhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
+define <2 x half> @undef_lhs_cvt_pkrtz(float %y) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @undef_rhs_cvt_pkrtz(
+; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
+define <2 x half> @undef_rhs_cvt_pkrtz(float %x) {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @undef_cvt_pkrtz(
+; CHECK: ret <2 x half> undef
+define <2 x half> @undef_cvt_pkrtz() {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @constant_splat0_cvt_pkrtz(
+; CHECK: ret <2 x half> zeroinitializer
+define <2 x half> @constant_splat0_cvt_pkrtz() {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0)
+  ret <2 x half> %cvt
+}
+
+; CHECK-LABEL: @constant_cvt_pkrtz(
+; CHECK: ret <2 x half> <half 0xH4000, half 0xH4400>
+define <2 x half> @constant_cvt_pkrtz() {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0)
+  ret <2 x half> %cvt
+}
+
+; Test constant values where rtz changes result
+; CHECK-LABEL: @constant_rtz_pkrtz(
+; CHECK: ret <2 x half> <half 0xH7BFF, half 0xH7BFF>
+define <2 x half> @constant_rtz_pkrtz() {
+  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0)
+  ret <2 x half> %cvt
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.cvt.pknorm.i16
+; --------------------------------------------------------------------
+
+declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) nounwind readnone
+
+; CHECK-LABEL: @undef_lhs_cvt_pknorm_i16(
+; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y)
+define <2 x i16> @undef_lhs_cvt_pknorm_i16(float %y) {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y)
+  ret <2 x i16> %cvt
+}
+
+; CHECK-LABEL: @undef_rhs_cvt_pknorm_i16(
+; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef)
+define <2 x i16> @undef_rhs_cvt_pknorm_i16(float %x) {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef)
+  ret <2 x i16> %cvt
+}
+
+; CHECK-LABEL: @undef_cvt_pknorm_i16(
+; CHECK: ret <2 x i16> undef
+define <2 x i16> @undef_cvt_pknorm_i16() {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float undef)
+  ret <2 x i16> %cvt
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.cvt.pknorm.u16
+; --------------------------------------------------------------------
+
+declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) nounwind readnone
+
+; CHECK-LABEL: @undef_lhs_cvt_pknorm_u16(
+; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y)
+define <2 x i16> @undef_lhs_cvt_pknorm_u16(float %y) {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y)
+  ret <2 x i16> %cvt
+}
+
+; CHECK-LABEL: @undef_rhs_cvt_pknorm_u16(
+; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef)
+define <2 x i16> @undef_rhs_cvt_pknorm_u16(float %x) {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef)
+  ret <2 x i16> %cvt
+}
+
+; CHECK-LABEL: @undef_cvt_pknorm_u16(
+; CHECK: ret <2 x i16> undef
+define <2 x i16> @undef_cvt_pknorm_u16() {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float undef)
+  ret <2 x i16> %cvt
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.cvt.pk.i16
+; --------------------------------------------------------------------
+
+declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) nounwind readnone
+
+; CHECK-LABEL: @undef_lhs_cvt_pk_i16(
+; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y)
+define <2 x i16> @undef_lhs_cvt_pk_i16(i32 %y) {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y)
+  ret <2 x i16> %cvt
+}
+
+; CHECK-LABEL: @undef_rhs_cvt_pk_i16(
+; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef)
+define <2 x i16> @undef_rhs_cvt_pk_i16(i32 %x) {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef)
+  ret <2 x i16> %cvt
+}
+
+; CHECK-LABEL: @undef_cvt_pk_i16(
+; CHECK: ret <2 x i16> undef
+define <2 x i16> @undef_cvt_pk_i16() {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 undef)
+  ret <2 x i16> %cvt
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.cvt.pk.u16
+; --------------------------------------------------------------------
+
+declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) nounwind readnone
+
+; CHECK-LABEL: @undef_lhs_cvt_pk_u16(
+; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y)
+define <2 x i16> @undef_lhs_cvt_pk_u16(i32 %y) {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y)
+  ret <2 x i16> %cvt
+}
+
+; CHECK-LABEL: @undef_rhs_cvt_pk_u16(
+; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef)
+define <2 x i16> @undef_rhs_cvt_pk_u16(i32 %x) {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef)
+  ret <2 x i16> %cvt
+}
+
+; CHECK-LABEL: @undef_cvt_pk_u16(
+; CHECK: ret <2 x i16> undef
+define <2 x i16> @undef_cvt_pk_u16() {
+  %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 undef)
+  ret <2 x i16> %cvt
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.ubfe
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
+declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone
+
+; CHECK-LABEL: @ubfe_var_i32(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
+define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 5, i32 %width)
+define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 5)
+define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_width_0(
+; CHECK-NEXT: ret i32 0
+define i32 @ubfe_width_0(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_width_31(
+; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
+define i32 @ubfe_width_31(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_width_32(
+; CHECK-NEXT: ret i32 0
+define i32 @ubfe_width_32(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_width_33(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 1)
+define i32 @ubfe_width_33(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_33(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 1, i32 %width)
+define i32 @ubfe_offset_33(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_0(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
+define i32 @ubfe_offset_0(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_32(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
+define i32 @ubfe_offset_32(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_31(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width)
+define i32 @ubfe_offset_31(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_0_width_0(
+; CHECK-NEXT: ret i32 0
+define i32 @ubfe_offset_0_width_0(i32 %src) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_0_width_3(
+; CHECK-NEXT: and i32 %src, 7
+; CHECK-NEXT: ret
+define i32 @ubfe_offset_0_width_3(i32 %src) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_3_width_1(
+; CHECK-NEXT: %1 = lshr i32 %src, 3
+; CHECK-NEXT: and i32 %1, 1
+; CHECK-NEXT: ret i32
+define i32 @ubfe_offset_3_width_1(i32 %src) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_3_width_4(
+; CHECK-NEXT: %1 = lshr i32 %src, 3
+; CHECK-NEXT: and i32 %1, 15
+; CHECK-NEXT: ret i32
+define i32 @ubfe_offset_3_width_4(i32 %src) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_0_0_0(
+; CHECK-NEXT: ret i32 0
+define i32 @ubfe_0_0_0() {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_neg1_5_7(
+; CHECK-NEXT: ret i32 127
+define i32 @ubfe_neg1_5_7() {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_undef_src_i32(
+; CHECK-NEXT: ret i32 undef
+define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_undef_offset_i32(
+; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
+define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_undef_width_i32(
+; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
+define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) {
+  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_33_width_4_i64(
+; CHECK-NEXT: %1 = lshr i64 %src, 33
+; CHECK-NEXT: %bfe = and i64 %1, 15
+define i64 @ubfe_offset_33_width_4_i64(i64 %src) {
+  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4)
+  ret i64 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_0_i64(
+; CHECK-NEXT: %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
+define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) {
+  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
+  ret i64 %bfe
+}
+
+; CHECK-LABEL: @ubfe_offset_32_width_32_i64(
+; CHECK-NEXT: %bfe = lshr i64 %src, 32
+; CHECK-NEXT: ret i64 %bfe
+define i64 @ubfe_offset_32_width_32_i64(i64 %src) {
+  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32)
+  ret i64 %bfe
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.sbfe
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
+declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone
+
+; CHECK-LABEL: @sbfe_offset_31(
+; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width)
+define i32 @sbfe_offset_31(i32 %src, i32 %width) {
+  %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @sbfe_neg1_5_7(
+; CHECK-NEXT: ret i32 -1
+define i32 @sbfe_neg1_5_7() {
+  %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7)
+  ret i32 %bfe
+}
+
+; CHECK-LABEL: @sbfe_offset_32_width_32_i64(
+; CHECK-NEXT: %bfe = ashr i64 %src, 32
+; CHECK-NEXT: ret i64 %bfe
+define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
+  %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32)
+  ret i64 %bfe
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.exp
+; --------------------------------------------------------------------
+
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly
+
+; CHECK-LABEL: @exp_disabled_inputs_to_undef(
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float undef, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float %y, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float %z, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float %w, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false)
+define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) {
+  ; enable src0..src3 constants
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+
+  ; enable src0..src3 variables
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false)
+
+  ; enable none
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false)
+
+  ; enable different source combinations
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
+
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.exp.compr
+; --------------------------------------------------------------------
+
+declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly
+
+; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
+
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> %zw, i1 true, i1 false)
+; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) {
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
+
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.fmed3
+; --------------------------------------------------------------------
+
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
+
+; CHECK-LABEL: @fmed3_f32(
+; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
+define float @fmed3_f32(float %x, float %y, float %z) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
+define float @fmed3_canonicalize_x_c0_c1_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
+define float @fmed3_canonicalize_c0_x_c1_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
+define float @fmed3_canonicalize_c0_c1_x_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
+define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32(
+; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
+define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32(
+; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
+define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_undef_x_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_undef_x_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32(
+; CHECK: call nnan float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) {
+  %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_undef_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_x_undef_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_y_undef_f32(
+; CHECK: call float @llvm.maxnum.f32(float %x, float %y)
+define float @fmed3_x_y_undef_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_qnan0_x_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_qnan0_x_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_qnan0_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
+; CHECK: call float @llvm.maxnum.f32(float %x, float %y)
+define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_qnan1_x_y_f32(
+; CHECK: call float @llvm.minnum.f32(float %x, float %y)
+define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
+  ret float %med3
+}
+
+; This can return any of the qnans.
+; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
+; CHECK: ret float 0x7FF8030000000000
+define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src0_0_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src0_0_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src0_1_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src0_1_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src1_0_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src1_0_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src1_1_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src1_1_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src2_0_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src2_0_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_constant_src2_1_f32(
+; CHECK: ret float 5.000000e-01
+define float @fmed3_constant_src2_1_f32(float %x, float %y) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32(
+; CHECK: ret float %x
+define float @fmed3_x_qnan0_qnan1_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32(
+; CHECK: ret float %x
+define float @fmed3_qnan0_x_qnan1_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32(
+; CHECK: ret float %x
+define float @fmed3_qnan0_qnan1_x_f32(float %x) {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_nan_0_1_f32(
+; CHECK: ret float 0.0
+define float @fmed3_nan_0_1_f32() {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0.0, float 1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_0_nan_1_f32(
+; CHECK: ret float 0.0
+define float @fmed3_0_nan_1_f32() {
+  %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 0x7FF8001000000000, float 1.0)
+  ret float %med
+}
+
+; CHECK-LABEL: @fmed3_0_1_nan_f32(
+; CHECK: ret float 1.0
+define float @fmed3_0_1_nan_f32() {
+  %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
+  ret float %med
+}
+
+; CHECK-LABEL: @fmed3_undef_0_1_f32(
+; CHECK: ret float 0.0
+define float @fmed3_undef_0_1_f32() {
+  %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float 0.0, float 1.0)
+  ret float %med3
+}
+
+; CHECK-LABEL: @fmed3_0_undef_1_f32(
+; CHECK: ret float 0.0
+define float @fmed3_0_undef_1_f32() {
+  %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float undef, float 1.0)
+  ret float %med
+}
+
+; CHECK-LABEL: @fmed3_0_1_undef_f32(
+; CHECK: ret float 1.0
+define float @fmed3_0_1_undef_f32() {
+  %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
+  ret float %med
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.icmp
+; --------------------------------------------------------------------
+
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32 immarg) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32 immarg) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32 immarg) nounwind readnone convergent
+
+; CHECK-LABEL: @invalid_icmp_code(
+; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
+; CHECK: %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
+define i64 @invalid_icmp_code(i32 %a, i32 %b) {
+  %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
+  %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
+  %or = or i64 %under, %over
+  ret i64 %or
+}
+
+; CHECK-LABEL: @icmp_constant_inputs_false(
+; CHECK: ret i64 0
+define i64 @icmp_constant_inputs_false() {
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 32)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @icmp_constant_inputs_true(
+; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5
+define i64 @icmp_constant_inputs_true() {
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @icmp_constant_to_rhs_slt(
+; CHECK: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %x, i32 9, i32 38)
+define i64 @icmp_constant_to_rhs_slt(i32 %x) {
+  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 %x, i32 40)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
+define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
+  %cmp = icmp ne i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 41)
+define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
+  %cmp = icmp sle i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
+define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
+  %cmp = icmp ugt i64 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
+define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
+  %cmp = icmp ugt i64 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 0, i32 %zext.cmp, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 1)
+define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
+  %cmp = fcmp oeq float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
+define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
+  %cmp = fcmp une float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 4)
+define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
+  %cmp = fcmp olt double %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
+; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %sext.cmp = sext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
+define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
+define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
+  %cmp = icmp slt i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
+define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
+  %cmp = fcmp oeq float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 2)
+define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
+  %cmp = fcmp ule float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 13)
+define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
+  %cmp = fcmp ogt float %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
+; CHECK: %zext.cond = zext i1 %cond to i32
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 0, i32 33)
+define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
+  %zext.cond = zext i1 %cond to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
+; CHECK: %zext.cond = zext i1 %cond to i32
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
+define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
+  %zext.cond = zext i1 %cond to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
+; CHECK: %sext.cond = sext i1 %cond to i32
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
+define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
+  %sext.cond = sext i1 %cond to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
+; CHECK: %sext.cond = sext i1 %cond to i32
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 0, i32 33)
+define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
+  %sext.cond = sext i1 %cond to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 -1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
+; CHECK: %sext.cond = sext i1 %cond to i64
+; CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 0, i32 33)
+define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
+  %sext.cond = sext i1 %cond to i64
+  %mask = call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 -1, i32 32)
+  ret i64 %mask
+}
+
+; TODO: Should be able to fold to false
+; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
+; CHECK: %cmp = icmp eq i32 %a, %b
+; CHECK: %sext.cmp = sext i1 %cmp to i32
+; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
+define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %sext.cmp = sext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
+define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %sext.cmp = sext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
+; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
+define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
+  %cmp = icmp sge i32 %a, %b
+  %sext.cmp = sext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 38)
+define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
+  %cmp = icmp sle i32 %a, %b
+  %not = xor i1 %cmp, true
+  %zext.cmp = zext i1 %not to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i4(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_eq_i4(i4 %a, i4 %b) {
+  %cmp = icmp eq i4 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i8(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_eq_i8(i8 %a, i8 %b) {
+  %cmp = icmp eq i8 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i16(
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 %a, i16 %b, i32 32)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_eq_i16(i16 %a, i16 %b) {
+  %cmp = icmp eq i16 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i36(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i36 [[A:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i36 [[B:%.*]] to i64
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_eq_i36(i36 %a, i36 %b) {
+  %cmp = icmp eq i36 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i128(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_eq_i128(i128 %a, i128 %b) {
+  %cmp = icmp eq i128 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f16(
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f16(half [[A:%.*]], half [[B:%.*]], i32 1)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f16(half %a, half %b) {
+  %cmp = fcmp oeq half %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f128(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f128(fp128 %a, fp128 %b) {
+;
+  %cmp = fcmp oeq fp128 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i4(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i4 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i4 [[B:%.*]] to i16
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_slt_i4(i4 %a, i4 %b) {
+  %cmp = icmp slt i4 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i8(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i8 [[B:%.*]] to i16
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_slt_i8(i8 %a, i8 %b) {
+  %cmp = icmp slt i8 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i16(
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 %a, i16 %b, i32 40)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_slt_i16(i16 %a, i16 %b) {
+  %cmp = icmp slt i16 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i4(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_ult_i4(i4 %a, i4 %b) {
+  %cmp = icmp ult i4 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i8(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_ult_i8(i8 %a, i8 %b) {
+  %cmp = icmp ult i8 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i16(
+; CHECK-NEXT:    [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 %a, i16 %b, i32 36)
+; CHECK-NEXT:    ret i64 [[MASK]]
+define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) {
+  %cmp = icmp ult i16 %a, %b
+  %zext.cmp = zext i1 %cmp to i32
+  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
+  ret i64 %mask
+}
+
+; 1-bit NE comparisons
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) {
+  %cmp = icmp eq i32 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) {
+  %cmp = icmp ne i32 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) {
+  %cmp = icmp sle i32 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) {
+  %cmp = icmp ugt i64 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) {
+  %cmp = icmp ugt i64 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 false, i1 %cmp, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32(
+; CHECK-NEXT: fcmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) {
+  %cmp = fcmp oeq float %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32(
+; CHECK-NEXT: fcmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) {
+  %cmp = fcmp une float %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64(
+; CHECK-NEXT: fcmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) {
+  %cmp = fcmp olt double %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4(
+; CHECK-NEXT: icmp
+; CHECK: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) {
+  %cmp = icmp eq i4 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8(
+; CHECK-NEXT: icmp
+; CHECK: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) {
+  %cmp = icmp eq i8 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16(
+; CHECK-NEXT: icmp
+; CHECK: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) {
+  %cmp = icmp eq i16 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36(
+; CHECK-NEXT: icmp
+; CHECK: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) {
+  %cmp = icmp eq i36 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) {
+  %cmp = icmp eq i128 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16(
+; CHECK-NEXT: fcmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) {
+  %cmp = fcmp oeq half %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128(
+; CHECK-NEXT: fcmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) {
+;
+  %cmp = fcmp oeq fp128 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) {
+  %cmp = icmp slt i4 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) {
+  %cmp = icmp slt i8 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) {
+  %cmp = icmp slt i16 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) {
+  %cmp = icmp ult i4 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) {
+  %cmp = icmp ult i8 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16(
+; CHECK-NEXT: icmp
+; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
+  %cmp = icmp ult i16 %a, %b
+  %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
+  ret i64 %mask
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.fcmp
+; --------------------------------------------------------------------
+
+declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32 immarg) nounwind readnone convergent
+
+; CHECK-LABEL: @invalid_fcmp_code(
+; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
+; CHECK: %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
+define i64 @invalid_fcmp_code(float %a, float %b) {
+  %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
+  %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
+  %or = or i64 %under, %over
+  ret i64 %or
+}
+
+; CHECK-LABEL: @fcmp_constant_inputs_false(
+; CHECK: ret i64 0
+define i64 @fcmp_constant_inputs_false() {
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 1)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @fcmp_constant_inputs_true(
+; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5
+define i64 @fcmp_constant_inputs_true() {
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
+  ret i64 %result
+}
+
+; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
+; CHECK: %result = call i64 @llvm.amdgcn.fcmp.f32(float %x, float 4.000000e+00, i32 2)
+define i64 @fcmp_constant_to_rhs_olt(float %x) {
+  %result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4)
+  ret i64 %result
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wqm.vote
+; --------------------------------------------------------------------
+
+declare i1 @llvm.amdgcn.wqm.vote(i1)
+
+; CHECK-LABEL: @wqm_vote_true(
+; CHECK: ret float 1.000000e+00
+define float @wqm_vote_true() {
+main_body:
+  %w = call i1 @llvm.amdgcn.wqm.vote(i1 true)
+  %r = select i1 %w, float 1.0, float 0.0
+  ret float %r
+}
+
+; CHECK-LABEL: @wqm_vote_false(
+; CHECK: ret float 0.000000e+00
+define float @wqm_vote_false() {
+main_body:
+  %w = call i1 @llvm.amdgcn.wqm.vote(i1 false)
+  %r = select i1 %w, float 1.0, float 0.0
+  ret float %r
+}
+
+; CHECK-LABEL: @wqm_vote_undef(
+; CHECK: ret float 0.000000e+00
+define float @wqm_vote_undef() {
+main_body:
+  %w = call i1 @llvm.amdgcn.wqm.vote(i1 undef)
+  %r = select i1 %w, float 1.0, float 0.0
+  ret float %r
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.kill
+; --------------------------------------------------------------------
+
+declare void @llvm.amdgcn.kill(i1)
+
+; CHECK-LABEL: @kill_true() {
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @kill_true() {
+  call void @llvm.amdgcn.kill(i1 true)
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.update.dpp.i32
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
+
+; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_no_combine(
+; CHECK: @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false)
+define amdgpu_kernel void @update_dpp_no_combine(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0)
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_drop_old(
+; CHECK: @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in2, i32 3, i32 15, i32 15, i1 true)
+define amdgpu_kernel void @update_dpp_drop_old(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1)
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_undef_old(
+; CHECK: @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 true)
+define amdgpu_kernel void @update_dpp_undef_old(i32 addrspace(1)* %out, i32 %in1) {
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1)
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: attributes #5 = { convergent }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/lit.local.cfg b/llvm/test/Transforms/InstCombine/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..2a665f06be7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
new file mode 100644
index 00000000000..9efed367d19
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+}
+
+define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @constantMul() nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
+}
+
+define <4 x i32> @constantMulS() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <4 x i32> @constantMulU() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+}
+
+define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+  %b = add <4 x i32> zeroinitializer, %a
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %b = add <4 x i32> %x, %a
+  ret <4 x i32> %b  
+; CHECK: entry:
+; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %b
+}
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
new file mode 100644
index 00000000000..56eee546801
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; ARM AES intrinsic variants
+
+define <16 x i8> @combineXorAeseZeroARM(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: @combineXorAeseZeroARM(
+; CHECK-NEXT:    %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key)
+; CHECK-NEXT:    ret <16 x i8> %data.aes
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer)
+  ret <16 x i8> %data.aes
+}
+
+define <16 x i8> @combineXorAeseNonZeroARM(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: @combineXorAeseNonZeroARM(
+; CHECK-NEXT:    %data.xor = xor <16 x i8> %data, %key
+; CHECK-NEXT:    %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    ret <16 x i8> %data.aes
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+  ret <16 x i8> %data.aes
+}
+
+define <16 x i8> @combineXorAesdZeroARM(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: @combineXorAesdZeroARM(
+; CHECK-NEXT:    %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key)
+; CHECK-NEXT:    ret <16 x i8> %data.aes
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer)
+  ret <16 x i8> %data.aes
+}
+
+define <16 x i8> @combineXorAesdNonZeroARM(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: @combineXorAesdNonZeroARM(
+; CHECK-NEXT:    %data.xor = xor <16 x i8> %data, %key
+; CHECK-NEXT:    %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    ret <16 x i8> %data.aes
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+  ret <16 x i8> %data.aes
+}
+
+declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #0
+declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #0
diff --git a/llvm/test/Transforms/InstCombine/ARM/constant-fold-hang.ll b/llvm/test/Transforms/InstCombine/ARM/constant-fold-hang.ll
new file mode 100644
index 00000000000..2ca6b86ccc2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/constant-fold-hang.ll
@@ -0,0 +1,14 @@
+; RUN: opt -instcombine < %s
+
+; Function Attrs: nounwind readnone ssp
+define void @mulByZero(<4 x i16> %x) #0 {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) #2
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) #1
+
+attributes #0 = { nounwind readnone ssp }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/ARM/lit.local.cfg b/llvm/test/Transforms/InstCombine/ARM/lit.local.cfg
new file mode 100644
index 00000000000..236e1d34416
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll
new file mode 100644
index 00000000000..d22fa9c811d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/neon-intrinsics.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; The alignment arguments for NEON load/store intrinsics can be increased
+; by instcombine.  Check for this.
+
+; CHECK: vld4.v2i32.p0i8({{.*}}, i32 32)
+; CHECK: vst4.p0i8.v2i32({{.*}}, i32 16)
+
+@x = common global [8 x i32] zeroinitializer, align 32
+@y = common global [8 x i32] zeroinitializer, align 16
+
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+define void @test() nounwind ssp {
+  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
+  %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
+  %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+  %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
+  call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
+  ret void
+}
+
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/llvm/test/Transforms/InstCombine/ARM/strcmp.ll b/llvm/test/Transforms/InstCombine/ARM/strcmp.ll
new file mode 100644
index 00000000000..571a0f950e0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/strcmp.ll
@@ -0,0 +1,153 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strcmp(i8*, i8*)
+
+; strcmp("", x) -> -*x
+define arm_aapcscc i32 @test1(i8* %str2) {
+; CHECK-LABEL: @test1(
+; CHECK: %strcmpload = load i8, i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub nsw i32 0, %1
+; CHECK: ret i32 %2
+
+  %str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call arm_apcscc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+
+}
+
+; strcmp(x, "") -> *x
+define arm_aapcscc i32 @test2(i8* %str1) {
+; CHECK-LABEL: @test2(
+; CHECK: %strcmpload = load i8, i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)  -> cnst
+define arm_aapcscc i32 @test3() {
+; CHECK-LABEL: @test3(
+; CHECK: ret i32 -1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+define arm_aapcscc i32 @test4() {
+; CHECK-LABEL: @test4(
+; CHECK: ret i32 1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)   -> memcmp(x, y, <known length>)
+; (This transform is rather difficult to trigger in a useful manner)
+define arm_aapcscc i32 @test5(i1 %b) {
+; CHECK-LABEL: @test5(
+; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; CHECK: ret i32 %memcmp
+
+  %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
+  %str2 = select i1 %b, i8* %temp1, i8* %temp2
+  %temp3 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp3
+}
+
+; strcmp(x,x)  -> 0
+define arm_aapcscc i32 @test6(i8* %str) {
+; CHECK-LABEL: @test6(
+; CHECK: ret i32 0
+
+  %temp1 = call arm_aapcscc i32 @strcmp(i8* %str, i8* %str)
+  ret i32 %temp1
+}
+
+; strcmp("", x) -> -*x
+define arm_aapcs_vfpcc i32 @test1_vfp(i8* %str2) {
+; CHECK-LABEL: @test1_vfp(
+; CHECK: %strcmpload = load i8, i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub nsw i32 0, %1
+; CHECK: ret i32 %2
+
+  %str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+
+}
+
+; strcmp(x, "") -> *x
+define arm_aapcs_vfpcc i32 @test2_vfp(i8* %str1) {
+; CHECK-LABEL: @test2_vfp(
+; CHECK: %strcmpload = load i8, i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)  -> cnst
+define arm_aapcs_vfpcc i32 @test3_vfp() {
+; CHECK-LABEL: @test3_vfp(
+; CHECK: ret i32 -1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+define arm_aapcs_vfpcc i32 @test4_vfp() {
+; CHECK-LABEL: @test4_vfp(
+; CHECK: ret i32 1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)   -> memcmp(x, y, <known length>)
+; (This transform is rather difficult to trigger in a useful manner)
+define arm_aapcs_vfpcc i32 @test5_vfp(i1 %b) {
+; CHECK-LABEL: @test5_vfp(
+; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; CHECK: ret i32 %memcmp
+
+  %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
+  %str2 = select i1 %b, i8* %temp1, i8* %temp2
+  %temp3 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp3
+}
+
+; strcmp(x,x)  -> 0
+define arm_aapcs_vfpcc i32 @test6_vfp(i8* %str) {
+; CHECK-LABEL: @test6_vfp(
+; CHECK: ret i32 0
+
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str, i8* %str)
+  ret i32 %temp1
+}
diff --git a/llvm/test/Transforms/InstCombine/ARM/strcpy.ll b/llvm/test/Transforms/InstCombine/ARM/strcpy.ll
new file mode 100644
index 00000000000..19021219b52
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/strcpy.ll
@@ -0,0 +1,76 @@
+; Test that the strcpy library call simplifier works correctly for ARM procedure calls
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcpy(i8*, i8*)
+
+define arm_aapcscc void @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  call arm_aapcscc i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+  ret void
+}
+
+define arm_aapcscc i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+
+  %ret = call arm_aapcscc i8* @strcpy(i8* %dst, i8* %dst)
+; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0)
+  ret i8* %ret
+}
+
+define arm_aapcscc i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0
+
+  %ret = call arm_aapcscc i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: call arm_aapcscc i8* @strcpy
+  ret i8* %ret
+}
+
+define arm_aapcs_vfpcc void @test_simplify1_vfp() {
+; CHECK-LABEL: @test_simplify1_vfp(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+  ret void
+}
+
+define arm_aapcs_vfpcc i8* @test_simplify2_vfp() {
+; CHECK-LABEL: @test_simplify2_vfp(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+
+  %ret = call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %dst)
+; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0)
+  ret i8* %ret
+}
+
+define arm_aapcs_vfpcc i8* @test_no_simplify1_vfp() {
+; CHECK-LABEL: @test_no_simplify1_vfp(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0
+
+  %ret = call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: call arm_aapcs_vfpcc i8* @strcpy
+  ret i8* %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/ARM/tbl1.ll b/llvm/test/Transforms/InstCombine/ARM/tbl1.ll
new file mode 100644
index 00000000000..f3cd9102cdf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/tbl1.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv8-arm-none-eabi"
+
+; Turning a table lookup intrinsic into a shuffle vector instruction
+; can be beneficial. If the mask used for the lookup is the constant
+; vector {7,6,5,4,3,2,1,0}, then the back-end generates rev64
+; instructions instead.
+
+define <8 x i8> @tbl1_8x8(<8 x i8> %vec) {
+; CHECK-LABEL: @tbl1_8x8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i8> [[VEC:%.*]], <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i8> [[TMP0]]
+;
+entry:
+  %vtbl1 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %vec, <8 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <8 x i8> %vtbl1
+}
+
+; Bail the optimization if a mask index is out of range.
+define <8 x i8> @tbl1_8x8_out_of_range(<8 x i8> %vec) {
+; CHECK-LABEL: @tbl1_8x8_out_of_range(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VTBL1:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> [[VEC:%.*]], <8 x i8> <i8 8, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+; CHECK-NEXT:    ret <8 x i8> [[VTBL1]]
+;
+entry:
+  %vtbl1 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %vec, <8 x i8> <i8 8, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <8 x i8> %vtbl1
+}
+
+declare <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>)
diff --git a/llvm/test/Transforms/InstCombine/ARM/vld1.ll b/llvm/test/Transforms/InstCombine/ARM/vld1.ll
new file mode 100644
index 00000000000..c87ee04f420
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ARM/vld1.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv8-arm-none-eabi"
+
+; Turning a vld1 intrinsic into an llvm load is beneficial
+; when the underlying object being addressed comes from a
+; constant, since we get constant-folding for free.
+
+; Bail the optimization if the alignment is not a constant.
+define <2 x i64> @vld1_align(i8* %ptr, i32 %align) {
+; CHECK-LABEL: @vld1_align(
+; CHECK-NEXT:    [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[PTR:%.*]], i32 [[ALIGN:%.*]])
+; CHECK-NEXT:    ret <2 x i64> [[VLD1]]
+;
+  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 %align)
+  ret <2 x i64> %vld1
+}
+
+; Bail the optimization if the alignment is not power of 2.
+define <2 x i64> @vld1_align_pow2(i8* %ptr) {
+; CHECK-LABEL: @vld1_align_pow2(
+; CHECK-NEXT:    [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[PTR:%.*]], i32 3)
+; CHECK-NEXT:    ret <2 x i64> [[VLD1]]
+;
+  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 3)
+  ret <2 x i64> %vld1
+}
+
+define <8 x i8> @vld1_8x8(i8* %ptr) {
+; CHECK-LABEL: @vld1_8x8(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i8>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    ret <8 x i8> [[TMP2]]
+;
+  %vld1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %ptr, i32 1)
+  ret <8 x i8> %vld1
+}
+
+define <4 x i16> @vld1_4x16(i8* %ptr) {
+; CHECK-LABEL: @vld1_4x16(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <4 x i16>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    ret <4 x i16> [[TMP2]]
+;
+  %vld1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* %ptr, i32 2)
+  ret <4 x i16> %vld1
+}
+
+define <2 x i32> @vld1_2x32(i8* %ptr) {
+; CHECK-LABEL: @vld1_2x32(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <2 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %vld1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* %ptr, i32 4)
+  ret <2 x i32> %vld1
+}
+
+define <1 x i64> @vld1_1x64(i8* %ptr) {
+; CHECK-LABEL: @vld1_1x64(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <1 x i64>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8
+; CHECK-NEXT:    ret <1 x i64> [[TMP2]]
+;
+  %vld1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %ptr, i32 8)
+  ret <1 x i64> %vld1
+}
+
+define <8 x i16> @vld1_8x16(i8* %ptr) {
+; CHECK-LABEL: @vld1_8x16(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i16>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %vld1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %ptr, i32 2)
+  ret <8 x i16> %vld1
+}
+
+define <16 x i8> @vld1_16x8(i8* %ptr) {
+; CHECK-LABEL: @vld1_16x8(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <16 x i8>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+;
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %ptr, i32 1)
+  ret <16 x i8> %vld1
+}
+
+define <4 x i32> @vld1_4x32(i8* %ptr) {
+; CHECK-LABEL: @vld1_4x32(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %vld1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* %ptr, i32 4)
+  ret <4 x i32> %vld1
+}
+
+define <2 x i64> @vld1_2x64(i8* %ptr) {
+; CHECK-LABEL: @vld1_2x64(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 8)
+  ret <2 x i64> %vld1
+}
+
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8*, i32)
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8*, i32)
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8*, i32)
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32)
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32)
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32)
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8*, i32)
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8*, i32)
diff --git a/llvm/test/Transforms/InstCombine/AddOverFlow.ll b/llvm/test/Transforms/InstCombine/AddOverFlow.ll
new file mode 100644
index 00000000000..13494206f07
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AddOverFlow.ll
@@ -0,0 +1,266 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; %a is negative, %b is positive
+define i16 @oppositesign(i16 %x, i16 %y) {
+; CHECK-LABEL: @oppositesign(
+; CHECK-NEXT:    [[A:%.*]] = or i16 [[X:%.*]], -32768
+; CHECK-NEXT:    [[B:%.*]] = and i16 [[Y:%.*]], 32767
+; CHECK-NEXT:    [[C:%.*]] = add nsw i16 [[A]], [[B]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = or i16 %x, 32768
+  %b = and i16 %y, 32767
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+define i16 @zero_sign_bit(i16 %a) {
+; CHECK-LABEL: @zero_sign_bit(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[A:%.*]], 32767
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i16 [[TMP1]], 512
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %1 = and i16 %a, 32767
+  %2 = add i16 %1, 512
+  ret i16 %2
+}
+
+define i16 @zero_sign_bit2(i16 %a, i16 %b) {
+; CHECK-LABEL: @zero_sign_bit2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[A:%.*]], 32767
+; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[B:%.*]], 32767
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i16 [[TMP3]]
+;
+  %1 = and i16 %a, 32767
+  %2 = and i16 %b, 32767
+  %3 = add i16 %1, %2
+  ret i16 %3
+}
+
+declare i16 @bounded(i16 %input);
+declare i32 @__gxx_personality_v0(...);
+!0 = !{i16 0, i16 32768} ; [0, 32767]
+!1 = !{i16 0, i16 32769} ; [0, 32768]
+
+define i16 @add_bounded_values(i16 %a, i16 %b) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @add_bounded_values(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = call i16 @bounded(i16 [[A:%.*]]), !range !0
+; CHECK-NEXT:    [[D:%.*]] = invoke i16 @bounded(i16 [[B:%.*]])
+; CHECK-NEXT:    to label [[CONT:%.*]] unwind label [[LPAD:%.*]], !range !0
+; CHECK:       cont:
+; CHECK-NEXT:    [[E:%.*]] = add nuw i16 [[C]], [[D]]
+; CHECK-NEXT:    ret i16 [[E]]
+; CHECK:       lpad:
+; CHECK-NEXT:    [[TMP0:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    filter [0 x i8*] zeroinitializer
+; CHECK-NEXT:    ret i16 42
+;
+entry:
+  %c = call i16 @bounded(i16 %a), !range !0
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !0
+cont:
+; %c and %d are in [0, 32767]. Therefore, %c + %d doesn't unsigned overflow.
+  %e = add i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 }
+  filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
+define i16 @add_bounded_values_2(i16 %a, i16 %b) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @add_bounded_values_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = call i16 @bounded(i16 [[A:%.*]]), !range !1
+; CHECK-NEXT:    [[D:%.*]] = invoke i16 @bounded(i16 [[B:%.*]])
+; CHECK-NEXT:    to label [[CONT:%.*]] unwind label [[LPAD:%.*]], !range !1
+; CHECK:       cont:
+; CHECK-NEXT:    [[E:%.*]] = add i16 [[C]], [[D]]
+; CHECK-NEXT:    ret i16 [[E]]
+; CHECK:       lpad:
+; CHECK-NEXT:    [[TMP0:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    filter [0 x i8*] zeroinitializer
+; CHECK-NEXT:    ret i16 42
+;
+entry:
+  %c = call i16 @bounded(i16 %a), !range !1
+  %d = invoke i16 @bounded(i16 %b) to label %cont unwind label %lpad, !range !1
+cont:
+; Similar to add_bounded_values, but %c and %d are in [0, 32768]. Therefore,
+; %c + %d may unsigned overflow and we cannot add NUW.
+  %e = add i16 %c, %d
+  ret i16 %e
+lpad:
+  %0 = landingpad { i8*, i32 }
+  filter [0 x i8*] zeroinitializer
+  ret i16 42
+}
+
+; %a has at most one bit set
+; %b has a 0 bit other than the sign bit
+define i16 @ripple_nsw1(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_nsw1(
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[Y:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = and i16 [[X:%.*]], -16385
+; CHECK-NEXT:    [[C:%.*]] = add nuw nsw i16 [[A]], [[B]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = and i16 %y, 1
+  %b = and i16 %x, 49151
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+; Like the previous test, but flip %a and %b
+define i16 @ripple_nsw2(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_nsw2(
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[Y:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = and i16 [[X:%.*]], -16385
+; CHECK-NEXT:    [[C:%.*]] = add nuw nsw i16 [[B]], [[A]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = and i16 %y, 1
+  %b = and i16 %x, 49151
+  %c = add i16 %b, %a
+  ret i16 %c
+}
+
+define i16 @ripple_nsw3(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_nsw3(
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[Y:%.*]], -21845
+; CHECK-NEXT:    [[B:%.*]] = and i16 [[X:%.*]], 21843
+; CHECK-NEXT:    [[C:%.*]] = add nuw nsw i16 [[A]], [[B]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = and i16 %y, 43691
+  %b = and i16 %x, 21843
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+; Like the previous test, but flip %a and %b
+define i16 @ripple_nsw4(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_nsw4(
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[Y:%.*]], -21845
+; CHECK-NEXT:    [[B:%.*]] = and i16 [[X:%.*]], 21843
+; CHECK-NEXT:    [[C:%.*]] = add nuw nsw i16 [[B]], [[A]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = and i16 %y, 43691
+  %b = and i16 %x, 21843
+  %c = add i16 %b, %a
+  ret i16 %c
+}
+
+define i16 @ripple_nsw5(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_nsw5(
+; CHECK-NEXT:    [[A:%.*]] = or i16 [[Y:%.*]], -21845
+; CHECK-NEXT:    [[B:%.*]] = or i16 [[X:%.*]], -10923
+; CHECK-NEXT:    [[C:%.*]] = add nsw i16 [[A]], [[B]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = or i16 %y, 43691
+  %b = or i16 %x, 54613
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+; Like the previous test, but flip %a and %b
+define i16 @ripple_nsw6(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_nsw6(
+; CHECK-NEXT:    [[A:%.*]] = or i16 [[Y:%.*]], -21845
+; CHECK-NEXT:    [[B:%.*]] = or i16 [[X:%.*]], -10923
+; CHECK-NEXT:    [[C:%.*]] = add nsw i16 [[B]], [[A]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = or i16 %y, 43691
+  %b = or i16 %x, 54613
+  %c = add i16 %b, %a
+  ret i16 %c
+}
+
+; We know nothing about %x
+define i32 @ripple_no_nsw1(i32 %x, i32 %y) {
+; CHECK-LABEL: @ripple_no_nsw1(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[A]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %a = and i32 %y, 1
+  %b = add i32 %a, %x
+  ret i32 %b
+}
+
+; %a has at most one bit set
+; %b has a 0 bit, but it is the sign bit
+define i16 @ripple_no_nsw2(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_no_nsw2(
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[Y:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = and i16 [[X:%.*]], 32767
+; CHECK-NEXT:    [[C:%.*]] = add nuw i16 [[A]], [[B]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = and i16 %y, 1
+  %b = and i16 %x, 32767
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+define i16 @ripple_no_nsw3(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_no_nsw3(
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[Y:%.*]], -21845
+; CHECK-NEXT:    [[B:%.*]] = and i16 [[X:%.*]], 21845
+; CHECK-NEXT:    [[C:%.*]] = add i16 [[A]], [[B]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = and i16 %y, 43691
+  %b = and i16 %x, 21845
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+; Like the previous test, but flip %a and %b
+define i16 @ripple_no_nsw4(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_no_nsw4(
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[Y:%.*]], -21845
+; CHECK-NEXT:    [[B:%.*]] = and i16 [[X:%.*]], 21845
+; CHECK-NEXT:    [[C:%.*]] = add i16 [[B]], [[A]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = and i16 %y, 43691
+  %b = and i16 %x, 21845
+  %c = add i16 %b, %a
+  ret i16 %c
+}
+
+define i16 @ripple_no_nsw5(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_no_nsw5(
+; CHECK-NEXT:    [[A:%.*]] = or i16 [[Y:%.*]], -21847
+; CHECK-NEXT:    [[B:%.*]] = or i16 [[X:%.*]], -10923
+; CHECK-NEXT:    [[C:%.*]] = add i16 [[A]], [[B]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = or i16 %y, 43689
+  %b = or i16 %x, 54613
+  %c = add i16 %a, %b
+  ret i16 %c
+}
+
+; Like the previous test, but flip %a and %b
+define i16 @ripple_no_nsw6(i16 %x, i16 %y) {
+; CHECK-LABEL: @ripple_no_nsw6(
+; CHECK-NEXT:    [[A:%.*]] = or i16 [[Y:%.*]], -21847
+; CHECK-NEXT:    [[B:%.*]] = or i16 [[X:%.*]], -10923
+; CHECK-NEXT:    [[C:%.*]] = add i16 [[B]], [[A]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %a = or i16 %y, 43689
+  %b = or i16 %x, 54613
+  %c = add i16 %b, %a
+  ret i16 %c
+}
diff --git a/llvm/test/Transforms/InstCombine/CPP_min_max.ll b/llvm/test/Transforms/InstCombine/CPP_min_max.ll
new file mode 100644
index 00000000000..04bf0ceefea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/CPP_min_max.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep select | not grep 'i32\*'
+
+; This testcase corresponds to PR362, which notices that this horrible code
+; is generated by the C++ front-end and LLVM optimizers, which has lots of
+; loads and other stuff that are unneeded.
+;
+; Instcombine should propagate the load through the select instructions to
+; allow elimination of the extra stuff by the mem2reg pass.
+
+define void @_Z5test1RiS_(i32* %x, i32* %y) {
+entry:
+        %tmp.1.i = load i32, i32* %y         ; <i32> [#uses=1]
+        %tmp.3.i = load i32, i32* %x         ; <i32> [#uses=1]
+        %tmp.4.i = icmp slt i32 %tmp.1.i, %tmp.3.i              ; <i1> [#uses=1]
+        %retval.i = select i1 %tmp.4.i, i32* %y, i32* %x                ; <i32*> [#uses=1]
+        %tmp.4 = load i32, i32* %retval.i            ; <i32> [#uses=1]
+        store i32 %tmp.4, i32* %x
+        ret void
+}
+
+define void @_Z5test2RiS_(i32* %x, i32* %y) {
+entry:
+        %tmp.0 = alloca i32             ; <i32*> [#uses=2]
+        %tmp.2 = load i32, i32* %x           ; <i32> [#uses=2]
+        store i32 %tmp.2, i32* %tmp.0
+        %tmp.3.i = load i32, i32* %y         ; <i32> [#uses=1]
+        %tmp.4.i = icmp slt i32 %tmp.2, %tmp.3.i                ; <i1> [#uses=1]
+        %retval.i = select i1 %tmp.4.i, i32* %y, i32* %tmp.0            ; <i32*> [#uses=1]
+        %tmp.6 = load i32, i32* %retval.i            ; <i32> [#uses=1]
+        store i32 %tmp.6, i32* %y
+        ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/ExtractCast.ll b/llvm/test/Transforms/InstCombine/ExtractCast.ll
new file mode 100644
index 00000000000..9a8872f2fb1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ExtractCast.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S -o - | FileCheck %s
+
+; CHECK-LABEL: @a(
+define i32 @a(<4 x i64> %I) {
+entry:
+; CHECK-NOT: trunc <4 x i64>
+        %J = trunc <4 x i64> %I to <4 x i32>
+        %K = extractelement <4 x i32> %J, i32 3
+; CHECK: extractelement <4 x i64>
+; CHECK: trunc i64
+; CHECK: ret
+        ret i32 %K
+}
+
+
+; CHECK-LABEL: @b(
+define i32 @b(<4 x float> %I) {
+entry:
+; CHECK-NOT: fptosi <4 x float>
+        %J = fptosi <4 x float> %I to <4 x i32>
+        %K = extractelement <4 x i32> %J, i32 3
+; CHECK: extractelement <4 x float>
+; CHECK: fptosi float
+; CHECK: ret
+        ret i32 %K
+}
+
diff --git a/llvm/test/Transforms/InstCombine/IntPtrCast.ll b/llvm/test/Transforms/InstCombine/IntPtrCast.ll
new file mode 100644
index 00000000000..4ecbccd86a4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/IntPtrCast.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:32:32"
+
+define i32* @test(i32* %P) {
+        %V = ptrtoint i32* %P to i32            ; <i32> [#uses=1]
+        %P2 = inttoptr i32 %V to i32*           ; <i32*> [#uses=1]
+        ret i32* %P2
+; CHECK: ret i32* %P
+}
+
diff --git a/llvm/test/Transforms/InstCombine/JavaCompare.ll b/llvm/test/Transforms/InstCombine/JavaCompare.ll
new file mode 100644
index 00000000000..8c1f307c79c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/JavaCompare.ll
@@ -0,0 +1,14 @@
+; This is the sequence of stuff that the Java front-end expands for a single 
+; <= comparison.  Check to make sure we turn it into a <= (only)
+
+; RUN: opt < %s -instcombine -S | grep "icmp sle i32 %A, %B"
+
+define i1 @le(i32 %A, i32 %B) {
+        %c1 = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
+        %tmp = select i1 %c1, i32 1, i32 0              ; <i32> [#uses=1]
+        %c2 = icmp slt i32 %A, %B               ; <i1> [#uses=1]
+        %result = select i1 %c2, i32 -1, i32 %tmp               ; <i32> [#uses=1]
+        %c3 = icmp sle i32 %result, 0           ; <i1> [#uses=1]
+        ret i1 %c3
+}
+
diff --git a/llvm/test/Transforms/InstCombine/LandingPadClauses.ll b/llvm/test/Transforms/InstCombine/LandingPadClauses.ll
new file mode 100644
index 00000000000..75050c91bbb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/LandingPadClauses.ll
@@ -0,0 +1,288 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@T1 = external constant i32
+@T2 = external constant i32
+@T3 = external constant i32
+
+declare i32 @generic_personality(i32, i64, i8*, i8*)
+declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
+declare i32 @__objc_personality_v0(i32, i64, i8*, i8*)
+declare i32 @__C_specific_handler(...)
+
+declare void @bar()
+
+define void @foo_generic() personality i32 (i32, i64, i8*, i8*)* @generic_personality {
+; CHECK-LABEL: @foo_generic(
+  invoke void @bar()
+    to label %cont.a unwind label %lpad.a
+cont.a:
+  invoke void @bar()
+    to label %cont.b unwind label %lpad.b
+cont.b:
+  invoke void @bar()
+    to label %cont.c unwind label %lpad.c
+cont.c:
+  invoke void @bar()
+    to label %cont.d unwind label %lpad.d
+cont.d:
+  invoke void @bar()
+    to label %cont.e unwind label %lpad.e
+cont.e:
+  invoke void @bar()
+    to label %cont.f unwind label %lpad.f
+cont.f:
+  invoke void @bar()
+    to label %cont.g unwind label %lpad.g
+cont.g:
+  invoke void @bar()
+    to label %cont.h unwind label %lpad.h
+cont.h:
+  invoke void @bar()
+    to label %cont.i unwind label %lpad.i
+cont.i:
+  ret void
+
+lpad.a:
+  %a = landingpad { i8*, i32 }
+          catch i32* @T1
+          catch i32* @T2
+          catch i32* @T1
+          catch i32* @T2
+  unreachable
+; CHECK: %a = landingpad
+; CHECK-NEXT: @T1
+; CHECK-NEXT: @T2
+; CHECK-NEXT: unreachable
+
+lpad.b:
+  %b = landingpad { i8*, i32 }
+          filter [0 x i32*] zeroinitializer
+          catch i32* @T1
+  unreachable
+; CHECK: %b = landingpad
+; CHECK-NEXT: filter
+; CHECK-NEXT: unreachable
+
+lpad.c:
+  %c = landingpad { i8*, i32 }
+          catch i32* @T1
+          filter [1 x i32*] [i32* @T1]
+          catch i32* @T2
+  unreachable
+; Caught types should not be removed from filters
+; CHECK: %c = landingpad
+; CHECK-NEXT: catch i32* @T1
+; CHECK-NEXT: filter [1 x i32*] [i32* @T1]
+; CHECK-NEXT: catch i32* @T2 
+; CHECK-NEXT: unreachable
+
+lpad.d:
+  %d = landingpad { i8*, i32 }
+          filter [3 x i32*] zeroinitializer
+  unreachable
+; CHECK: %d = landingpad
+; CHECK-NEXT: filter [1 x i32*] zeroinitializer
+; CHECK-NEXT: unreachable
+
+lpad.e:
+  %e = landingpad { i8*, i32 }
+          catch i32* @T1
+          filter [3 x i32*] [i32* @T1, i32* @T2, i32* @T2]
+  unreachable
+; Caught types should not be removed from filters
+; CHECK: %e = landingpad
+; CHECK-NEXT: catch i32* @T1
+; CHECK-NEXT: filter [2 x i32*] [i32* @T1, i32* @T2]
+; CHECK-NEXT: unreachable
+
+lpad.f:
+  %f = landingpad { i8*, i32 }
+          filter [2 x i32*] [i32* @T2, i32* @T1]
+          filter [1 x i32*] [i32* @T1]
+  unreachable
+; CHECK: %f = landingpad
+; CHECK-NEXT: filter [1 x i32*] [i32* @T1]
+; CHECK-NEXT: unreachable
+
+lpad.g:
+  %g = landingpad { i8*, i32 }
+          filter [1 x i32*] [i32* @T1]
+          catch i32* @T3
+          filter [2 x i32*] [i32* @T2, i32* @T1]
+  unreachable
+; CHECK: %g = landingpad
+; CHECK-NEXT: filter [1 x i32*] [i32* @T1]
+; CHECK-NEXT: catch i32* @T3
+; CHECK-NEXT: unreachable
+
+lpad.h:
+  %h = landingpad { i8*, i32 }
+          filter [2 x i32*] [i32* @T1, i32* null]
+          filter [1 x i32*] zeroinitializer
+  unreachable
+; CHECK: %h = landingpad
+; CHECK-NEXT: filter [1 x i32*] zeroinitializer
+; CHECK-NEXT: unreachable
+
+lpad.i:
+  %i = landingpad { i8*, i32 }
+          cleanup
+          filter [0 x i32*] zeroinitializer
+  unreachable
+; CHECK: %i = landingpad
+; CHECK-NEXT: filter
+; CHECK-NEXT: unreachable
+}
+
+define void @foo_cxx() personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0 {
+; CHECK-LABEL: @foo_cxx(
+  invoke void @bar()
+    to label %cont.a unwind label %lpad.a
+cont.a:
+  invoke void @bar()
+    to label %cont.b unwind label %lpad.b
+cont.b:
+  invoke void @bar()
+    to label %cont.c unwind label %lpad.c
+cont.c:
+  invoke void @bar()
+    to label %cont.d unwind label %lpad.d
+cont.d:
+  ret void
+
+lpad.a:
+  %a = landingpad { i8*, i32 }
+          catch i32* null
+          catch i32* @T1
+  unreachable
+; CHECK: %a = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+
+lpad.b:
+  %b = landingpad { i8*, i32 }
+          filter [1 x i32*] zeroinitializer
+  unreachable
+; CHECK: %b = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.c:
+  %c = landingpad { i8*, i32 }
+          filter [2 x i32*] [i32* @T1, i32* null]
+  unreachable
+; CHECK: %c = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.d:
+  %d = landingpad { i8*, i32 }
+          cleanup
+          catch i32* null
+  unreachable
+; CHECK: %d = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+}
+
+define void @foo_objc() personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0 {
+; CHECK-LABEL: @foo_objc(
+  invoke void @bar()
+    to label %cont.a unwind label %lpad.a
+cont.a:
+  invoke void @bar()
+    to label %cont.b unwind label %lpad.b
+cont.b:
+  invoke void @bar()
+    to label %cont.c unwind label %lpad.c
+cont.c:
+  invoke void @bar()
+    to label %cont.d unwind label %lpad.d
+cont.d:
+  ret void
+
+lpad.a:
+  %a = landingpad { i8*, i32 }
+          catch i32* null
+          catch i32* @T1
+  unreachable
+; CHECK: %a = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+
+lpad.b:
+  %b = landingpad { i8*, i32 }
+          filter [1 x i32*] zeroinitializer
+  unreachable
+; CHECK: %b = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.c:
+  %c = landingpad { i8*, i32 }
+          filter [2 x i32*] [i32* @T1, i32* null]
+  unreachable
+; CHECK: %c = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.d:
+  %d = landingpad { i8*, i32 }
+          cleanup
+          catch i32* null
+  unreachable
+; CHECK: %d = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+}
+
+define void @foo_seh() personality i32 (...)* @__C_specific_handler {
+; CHECK-LABEL: @foo_seh(
+  invoke void @bar()
+    to label %cont.a unwind label %lpad.a
+cont.a:
+  invoke void @bar()
+    to label %cont.b unwind label %lpad.b
+cont.b:
+  invoke void @bar()
+    to label %cont.c unwind label %lpad.c
+cont.c:
+  invoke void @bar()
+    to label %cont.d unwind label %lpad.d
+cont.d:
+  ret void
+
+lpad.a:
+  %a = landingpad { i8*, i32 }
+          catch i32* null
+          catch i32* @T1
+  unreachable
+; CHECK: %a = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+
+lpad.b:
+  %b = landingpad { i8*, i32 }
+          filter [1 x i32*] zeroinitializer
+  unreachable
+; CHECK: %b = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.c:
+  %c = landingpad { i8*, i32 }
+          filter [2 x i32*] [i32* @T1, i32* null]
+  unreachable
+; CHECK: %c = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.d:
+  %d = landingpad { i8*, i32 }
+          cleanup
+          catch i32* null
+  unreachable
+; CHECK: %d = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+}
diff --git a/llvm/test/Transforms/InstCombine/NVPTX/lit.local.cfg b/llvm/test/Transforms/InstCombine/NVPTX/lit.local.cfg
new file mode 100644
index 00000000000..2cb98eb371b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll b/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
new file mode 100644
index 00000000000..cb65b8fdc54
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll
@@ -0,0 +1,471 @@
+; Check that nvvm intrinsics get simplified to target-generic intrinsics where
+; possible.
+;
+; We run this test twice; once with ftz on, and again with ftz off.  Behold the
+; hackery:
+
+; RUN: cat %s > %t.ftz
+; RUN: echo 'attributes #0 = { "nvptx-f32ftz" = "true" }' >> %t.ftz
+; RUN: opt < %t.ftz -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ
+
+; RUN: cat %s > %t.noftz
+; RUN: echo 'attributes #0 = { "nvptx-f32ftz" = "false" }' >> %t.noftz
+; RUN: opt < %t.noftz -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ
+
+; We handle nvvm intrinsics with ftz variants as follows:
+;  - If the module is in ftz mode, the ftz variant is transformed into the
+;    regular llvm intrinsic, and the non-ftz variant is left alone.
+;  - If the module is not in ftz mode, it's the reverse: Only the non-ftz
+;    variant is transformed, and the ftz variant is left alone.
+
+; Check NVVM intrinsics that map directly to LLVM target-generic intrinsics.
+
+; CHECK-LABEL: @ceil_double
+define double @ceil_double(double %a) #0 {
+; CHECK: call double @llvm.ceil.f64
+  %ret = call double @llvm.nvvm.ceil.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @ceil_float
+define float @ceil_float(float %a) #0 {
+; NOFTZ: call float @llvm.ceil.f32
+; FTZ: call float @llvm.nvvm.ceil.f
+  %ret = call float @llvm.nvvm.ceil.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @ceil_float_ftz
+define float @ceil_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.ceil.ftz.f
+; FTZ: call float @llvm.ceil.f32
+  %ret = call float @llvm.nvvm.ceil.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @fabs_double
+define double @fabs_double(double %a) #0 {
+; CHECK: call double @llvm.fabs.f64
+  %ret = call double @llvm.nvvm.fabs.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @fabs_float
+define float @fabs_float(float %a) #0 {
+; NOFTZ: call float @llvm.fabs.f32
+; FTZ: call float @llvm.nvvm.fabs.f
+  %ret = call float @llvm.nvvm.fabs.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @fabs_float_ftz
+define float @fabs_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.fabs.ftz.f
+; FTZ: call float @llvm.fabs.f32
+  %ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @floor_double
+define double @floor_double(double %a) #0 {
+; CHECK: call double @llvm.floor.f64
+  %ret = call double @llvm.nvvm.floor.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @floor_float
+define float @floor_float(float %a) #0 {
+; NOFTZ: call float @llvm.floor.f32
+; FTZ: call float @llvm.nvvm.floor.f
+  %ret = call float @llvm.nvvm.floor.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @floor_float_ftz
+define float @floor_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.floor.ftz.f
+; FTZ: call float @llvm.floor.f32
+  %ret = call float @llvm.nvvm.floor.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @fma_double
+define double @fma_double(double %a, double %b, double %c) #0 {
+; CHECK: call double @llvm.fma.f64
+  %ret = call double @llvm.nvvm.fma.rn.d(double %a, double %b, double %c)
+  ret double %ret
+}
+; CHECK-LABEL: @fma_float
+define float @fma_float(float %a, float %b, float %c) #0 {
+; NOFTZ: call float @llvm.fma.f32
+; FTZ: call float @llvm.nvvm.fma.rn.f
+  %ret = call float @llvm.nvvm.fma.rn.f(float %a, float %b, float %c)
+  ret float %ret
+}
+; CHECK-LABEL: @fma_float_ftz
+define float @fma_float_ftz(float %a, float %b, float %c) #0 {
+; NOFTZ: call float @llvm.nvvm.fma.rn.ftz.f
+; FTZ: call float @llvm.fma.f32
+  %ret = call float @llvm.nvvm.fma.rn.ftz.f(float %a, float %b, float %c)
+  ret float %ret
+}
+
+; CHECK-LABEL: @fmax_double
+define double @fmax_double(double %a, double %b) #0 {
+; CHECK: call double @llvm.maxnum.f64
+  %ret = call double @llvm.nvvm.fmax.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @fmax_float
+define float @fmax_float(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.maxnum.f32
+; FTZ: call float @llvm.nvvm.fmax.f
+  %ret = call float @llvm.nvvm.fmax.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @fmax_float_ftz
+define float @fmax_float_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.fmax.ftz.f
+; FTZ: call float @llvm.maxnum.f32
+  %ret = call float @llvm.nvvm.fmax.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; CHECK-LABEL: @fmin_double
+define double @fmin_double(double %a, double %b) #0 {
+; CHECK: call double @llvm.minnum.f64
+  %ret = call double @llvm.nvvm.fmin.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @fmin_float
+define float @fmin_float(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.minnum.f32
+; FTZ: call float @llvm.nvvm.fmin.f
+  %ret = call float @llvm.nvvm.fmin.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @fmin_float_ftz
+define float @fmin_float_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.fmin.ftz.f
+; FTZ: call float @llvm.minnum.f32
+  %ret = call float @llvm.nvvm.fmin.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; CHECK-LABEL: @round_double
+define double @round_double(double %a) #0 {
+; CHECK: call double @llvm.round.f64
+  %ret = call double @llvm.nvvm.round.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @round_float
+define float @round_float(float %a) #0 {
+; NOFTZ: call float @llvm.round.f32
+; FTZ: call float @llvm.nvvm.round.f
+  %ret = call float @llvm.nvvm.round.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @round_float_ftz
+define float @round_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.round.ftz.f
+; FTZ: call float @llvm.round.f32
+  %ret = call float @llvm.nvvm.round.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @trunc_double
+define double @trunc_double(double %a) #0 {
+; CHECK: call double @llvm.trunc.f64
+  %ret = call double @llvm.nvvm.trunc.d(double %a)
+  ret double %ret
+}
+; CHECK-LABEL: @trunc_float
+define float @trunc_float(float %a) #0 {
+; NOFTZ: call float @llvm.trunc.f32
+; FTZ: call float @llvm.nvvm.trunc.f
+  %ret = call float @llvm.nvvm.trunc.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @trunc_float_ftz
+define float @trunc_float_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.trunc.ftz.f
+; FTZ: call float @llvm.trunc.f32
+  %ret = call float @llvm.nvvm.trunc.ftz.f(float %a)
+  ret float %ret
+}
+
+; Check NVVM intrinsics that correspond to LLVM cast operations.
+
+; CHECK-LABEL: @test_d2i
+define i32 @test_d2i(double %a) #0 {
+; CHECK: fptosi double %a to i32
+  %ret = call i32 @llvm.nvvm.d2i.rz(double %a)
+  ret i32 %ret
+}
+; CHECK-LABEL: @test_f2i
+define i32 @test_f2i(float %a) #0 {
+; CHECK: fptosi float %a to i32
+  %ret = call i32 @llvm.nvvm.f2i.rz(float %a)
+  ret i32 %ret
+}
+; CHECK-LABEL: @test_d2ll
+define i64 @test_d2ll(double %a) #0 {
+; CHECK: fptosi double %a to i64
+  %ret = call i64 @llvm.nvvm.d2ll.rz(double %a)
+  ret i64 %ret
+}
+; CHECK-LABEL: @test_f2ll
+define i64 @test_f2ll(float %a) #0 {
+; CHECK: fptosi float %a to i64
+  %ret = call i64 @llvm.nvvm.f2ll.rz(float %a)
+  ret i64 %ret
+}
+; CHECK-LABEL: @test_d2ui
+define i32 @test_d2ui(double %a) #0 {
+; CHECK: fptoui double %a to i32
+  %ret = call i32 @llvm.nvvm.d2ui.rz(double %a)
+  ret i32 %ret
+}
+; CHECK-LABEL: @test_f2ui
+define i32 @test_f2ui(float %a) #0 {
+; CHECK: fptoui float %a to i32
+  %ret = call i32 @llvm.nvvm.f2ui.rz(float %a)
+  ret i32 %ret
+}
+; CHECK-LABEL: @test_d2ull
+define i64 @test_d2ull(double %a) #0 {
+; CHECK: fptoui double %a to i64
+  %ret = call i64 @llvm.nvvm.d2ull.rz(double %a)
+  ret i64 %ret
+}
+; CHECK-LABEL: @test_f2ull
+define i64 @test_f2ull(float %a) #0 {
+; CHECK: fptoui float %a to i64
+  %ret = call i64 @llvm.nvvm.f2ull.rz(float %a)
+  ret i64 %ret
+}
+
+; CHECK-LABEL: @test_i2d
+define double @test_i2d(i32 %a) #0 {
+; CHECK: sitofp i32 %a to double
+  %ret = call double @llvm.nvvm.i2d.rz(i32 %a)
+  ret double %ret
+}
+; CHECK-LABEL: @test_i2f
+define float @test_i2f(i32 %a) #0 {
+; CHECK: sitofp i32 %a to float
+  %ret = call float @llvm.nvvm.i2f.rz(i32 %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_ll2d
+define double @test_ll2d(i64 %a) #0 {
+; CHECK: sitofp i64 %a to double
+  %ret = call double @llvm.nvvm.ll2d.rz(i64 %a)
+  ret double %ret
+}
+; CHECK-LABEL: @test_ll2f
+define float @test_ll2f(i64 %a) #0 {
+; CHECK: sitofp i64 %a to float
+  %ret = call float @llvm.nvvm.ll2f.rz(i64 %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_ui2d
+define double @test_ui2d(i32 %a) #0 {
+; CHECK: uitofp i32 %a to double
+  %ret = call double @llvm.nvvm.ui2d.rz(i32 %a)
+  ret double %ret
+}
+; CHECK-LABEL: @test_ui2f
+define float @test_ui2f(i32 %a) #0 {
+; CHECK: uitofp i32 %a to float
+  %ret = call float @llvm.nvvm.ui2f.rz(i32 %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_ull2d
+define double @test_ull2d(i64 %a) #0 {
+; CHECK: uitofp i64 %a to double
+  %ret = call double @llvm.nvvm.ull2d.rz(i64 %a)
+  ret double %ret
+}
+; CHECK-LABEL: @test_ull2f
+define float @test_ull2f(i64 %a) #0 {
+; CHECK: uitofp i64 %a to float
+  %ret = call float @llvm.nvvm.ull2f.rz(i64 %a)
+  ret float %ret
+}
+
+; Check NVVM intrinsics that map to LLVM binary operations.
+
+; CHECK-LABEL: @test_add_rn_d
+define double @test_add_rn_d(double %a, double %b) #0 {
+; CHECK: fadd
+  %ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @test_add_rn_f
+define float @test_add_rn_f(float %a, float %b) #0 {
+; NOFTZ: fadd
+; FTZ: call float @llvm.nvvm.add.rn.f
+  %ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @test_add_rn_f_ftz
+define float @test_add_rn_f_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.add.rn.f
+; FTZ: fadd
+  %ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; CHECK-LABEL: @test_mul_rn_d
+define double @test_mul_rn_d(double %a, double %b) #0 {
+; CHECK: fmul
+  %ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @test_mul_rn_f
+define float @test_mul_rn_f(float %a, float %b) #0 {
+; NOFTZ: fmul
+; FTZ: call float @llvm.nvvm.mul.rn.f
+  %ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @test_mul_rn_f_ftz
+define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.mul.rn.f
+; FTZ: fmul
+  %ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; CHECK-LABEL: @test_div_rn_d
+define double @test_div_rn_d(double %a, double %b) #0 {
+; CHECK: fdiv
+  %ret = call double @llvm.nvvm.div.rn.d(double %a, double %b)
+  ret double %ret
+}
+; CHECK-LABEL: @test_div_rn_f
+define float @test_div_rn_f(float %a, float %b) #0 {
+; NOFTZ: fdiv
+; FTZ: call float @llvm.nvvm.div.rn.f
+  %ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
+  ret float %ret
+}
+; CHECK-LABEL: @test_div_rn_f_ftz
+define float @test_div_rn_f_ftz(float %a, float %b) #0 {
+; NOFTZ: call float @llvm.nvvm.div.rn.f
+; FTZ: fdiv
+  %ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
+  ret float %ret
+}
+
+; Check NVVM intrinsics that require us to emit custom IR.
+
+; CHECK-LABEL: @test_rcp_rn_f
+define float @test_rcp_rn_f(float %a) #0 {
+; NOFTZ: fdiv float 1.0{{.*}} %a
+; FTZ: call float @llvm.nvvm.rcp.rn.f
+  %ret = call float @llvm.nvvm.rcp.rn.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_rcp_rn_f_ftz
+define float @test_rcp_rn_f_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.rcp.rn.f
+; FTZ: fdiv float 1.0{{.*}} %a
+  %ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
+  ret float %ret
+}
+
+; CHECK-LABEL: @test_sqrt_rn_d
+define double @test_sqrt_rn_d(double %a) #0 {
+; CHECK: call double @llvm.sqrt.f64(double %a)
+  %ret = call double @llvm.nvvm.sqrt.rn.d(double %a)
+  ret double %ret
+}
+; nvvm.sqrt.f is a special case: It goes to a llvm.sqrt.f
+; CHECK-LABEL: @test_sqrt_f
+define float @test_sqrt_f(float %a) #0 {
+; CHECK: call float @llvm.sqrt.f32(float %a)
+  %ret = call float @llvm.nvvm.sqrt.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_sqrt_rn_f
+define float @test_sqrt_rn_f(float %a) #0 {
+; NOFTZ: call float @llvm.sqrt.f32(float %a)
+; FTZ: call float @llvm.nvvm.sqrt.rn.f
+  %ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
+  ret float %ret
+}
+; CHECK-LABEL: @test_sqrt_rn_f_ftz
+define float @test_sqrt_rn_f_ftz(float %a) #0 {
+; NOFTZ: call float @llvm.nvvm.sqrt.rn.f
+; FTZ: call float @llvm.sqrt.f32(float %a)
+  %ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
+  ret float %ret
+}
+
+declare double @llvm.nvvm.add.rn.d(double, double)
+declare float @llvm.nvvm.add.rn.f(float, float)
+declare float @llvm.nvvm.add.rn.ftz.f(float, float)
+declare double @llvm.nvvm.ceil.d(double)
+declare float @llvm.nvvm.ceil.f(float)
+declare float @llvm.nvvm.ceil.ftz.f(float)
+declare float @llvm.nvvm.d2f.rm(double)
+declare float @llvm.nvvm.d2f.rm.ftz(double)
+declare float @llvm.nvvm.d2f.rp(double)
+declare float @llvm.nvvm.d2f.rp.ftz(double)
+declare float @llvm.nvvm.d2f.rz(double)
+declare float @llvm.nvvm.d2f.rz.ftz(double)
+declare i32 @llvm.nvvm.d2i.rz(double)
+declare i64 @llvm.nvvm.d2ll.rz(double)
+declare i32 @llvm.nvvm.d2ui.rz(double)
+declare i64 @llvm.nvvm.d2ull.rz(double)
+declare double @llvm.nvvm.div.rn.d(double, double)
+declare float @llvm.nvvm.div.rn.f(float, float)
+declare float @llvm.nvvm.div.rn.ftz.f(float, float)
+declare i16 @llvm.nvvm.f2h.rz(float)
+declare i16 @llvm.nvvm.f2h.rz.ftz(float)
+declare i32 @llvm.nvvm.f2i.rz(float)
+declare i32 @llvm.nvvm.f2i.rz.ftz(float)
+declare i64 @llvm.nvvm.f2ll.rz(float)
+declare i64 @llvm.nvvm.f2ll.rz.ftz(float)
+declare i32 @llvm.nvvm.f2ui.rz(float)
+declare i32 @llvm.nvvm.f2ui.rz.ftz(float)
+declare i64 @llvm.nvvm.f2ull.rz(float)
+declare i64 @llvm.nvvm.f2ull.rz.ftz(float)
+declare double @llvm.nvvm.fabs.d(double)
+declare float @llvm.nvvm.fabs.f(float)
+declare float @llvm.nvvm.fabs.ftz.f(float)
+declare double @llvm.nvvm.floor.d(double)
+declare float @llvm.nvvm.floor.f(float)
+declare float @llvm.nvvm.floor.ftz.f(float)
+declare double @llvm.nvvm.fma.rn.d(double, double, double)
+declare float @llvm.nvvm.fma.rn.f(float, float, float)
+declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float)
+declare double @llvm.nvvm.fmax.d(double, double)
+declare float @llvm.nvvm.fmax.f(float, float)
+declare float @llvm.nvvm.fmax.ftz.f(float, float)
+declare double @llvm.nvvm.fmin.d(double, double)
+declare float @llvm.nvvm.fmin.f(float, float)
+declare float @llvm.nvvm.fmin.ftz.f(float, float)
+declare double @llvm.nvvm.i2d.rz(i32)
+declare float @llvm.nvvm.i2f.rz(i32)
+declare double @llvm.nvvm.ll2d.rz(i64)
+declare float @llvm.nvvm.ll2f.rz(i64)
+declare double @llvm.nvvm.lohi.i2d(i32, i32)
+declare double @llvm.nvvm.mul.rn.d(double, double)
+declare float @llvm.nvvm.mul.rn.f(float, float)
+declare float @llvm.nvvm.mul.rn.ftz.f(float, float)
+declare double @llvm.nvvm.rcp.rm.d(double)
+declare double @llvm.nvvm.rcp.rn.d(double)
+declare float @llvm.nvvm.rcp.rn.f(float)
+declare float @llvm.nvvm.rcp.rn.ftz.f(float)
+declare double @llvm.nvvm.round.d(double)
+declare float @llvm.nvvm.round.f(float)
+declare float @llvm.nvvm.round.ftz.f(float)
+declare float @llvm.nvvm.sqrt.f(float)
+declare double @llvm.nvvm.sqrt.rn.d(double)
+declare float @llvm.nvvm.sqrt.rn.f(float)
+declare float @llvm.nvvm.sqrt.rn.ftz.f(float)
+declare double @llvm.nvvm.trunc.d(double)
+declare float @llvm.nvvm.trunc.f(float)
+declare float @llvm.nvvm.trunc.ftz.f(float)
+declare double @llvm.nvvm.ui2d.rz(i32)
+declare float @llvm.nvvm.ui2f.rn(i32)
+declare float @llvm.nvvm.ui2f.rz(i32)
+declare double @llvm.nvvm.ull2d.rz(i64)
+declare float @llvm.nvvm.ull2f.rz(i64)
diff --git a/llvm/test/Transforms/InstCombine/OverlappingInsertvalues.ll b/llvm/test/Transforms/InstCombine/OverlappingInsertvalues.ll
new file mode 100644
index 00000000000..9248aecdf57
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/OverlappingInsertvalues.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that we can find and remove redundant insertvalues
+; CHECK-LABEL: foo_simple
+; CHECK-NOT: i8* %x, 0
+define { i8*, i64, i32 } @foo_simple(i8* %x, i8* %y) nounwind {
+entry:
+  %0 = insertvalue { i8*, i64, i32 } undef, i8* %x, 0
+  %1 = insertvalue { i8*, i64, i32 } %0, i8* %y, 0
+  ret { i8*, i64, i32 } %1
+}
+; Check that we can find and remove redundant nodes in insertvalues chain
+; CHECK-LABEL: foo_ovwrt_chain
+; CHECK-NOT: i64 %y, 1
+; CHECK-NOT: i32 555, 2
+define { i8*, i64, i32 } @foo_ovwrt_chain(i8* %x, i64 %y, i64 %z) nounwind {
+entry:
+  %0 = insertvalue { i8*, i64, i32 } undef, i8* %x, 0
+  %1 = insertvalue { i8*, i64, i32 } %0, i64 %y, 1
+  %2 = insertvalue { i8*, i64, i32 } %1, i32 555, 2
+  %3 = insertvalue { i8*, i64, i32 } %2, i64 %z, 1
+  %4 = insertvalue { i8*, i64, i32 } %3, i32 777, 2
+  ret { i8*, i64, i32 } %4
+}
+; Check that we propagate insertvalues only if they are use as the first
+; operand (as initial value of aggregate)
+; CHECK-LABEL: foo_use_as_second_operand
+; CHECK: i16 %x, 0
+; CHECK: %0, 1
+define { i8, {i16, i32} } @foo_use_as_second_operand(i16 %x) nounwind {
+entry:
+  %0 = insertvalue { i16, i32 } undef, i16 %x, 0
+  %1 = insertvalue { i8, {i16, i32} } undef, { i16, i32 } %0, 1
+  ret { i8, {i16, i32} } %1
+}
diff --git a/llvm/test/Transforms/InstCombine/PR30597.ll b/llvm/test/Transforms/InstCombine/PR30597.ll
new file mode 100644
index 00000000000..c0803ed7120
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/PR30597.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: readonly uwtable
+define i1 @dot_ref_s(i32** noalias nocapture readonly dereferenceable(8)) {
+entry-block:
+  %loadedptr = load i32*, i32** %0, align 8, !nonnull !0
+  %ptrtoint = ptrtoint i32* %loadedptr to i64
+  %inttoptr = inttoptr i64 %ptrtoint to i32*
+  %switchtmp = icmp eq i32* %inttoptr, null
+  ret i1 %switchtmp
+
+; CHECK-LABEL: @dot_ref_s
+; CHECK-NEXT: entry-block:
+; CHECK-NEXT: ret i1 false
+}
+
+; Function Attrs: readonly uwtable
+define i64* @function(i64* noalias nocapture readonly dereferenceable(8)) {
+entry-block:
+  %loaded = load i64, i64* %0, align 8, !range !1
+  %inttoptr = inttoptr i64 %loaded to i64*
+  ret i64* %inttoptr
+; CHECK-LABEL: @function
+; CHECK: %{{.+}} = load i64*, i64** %{{.+}}, align 8, !nonnull
+}
+
+
+!0 = !{}
+!1 = !{i64 1, i64 140737488355327}
diff --git a/llvm/test/Transforms/InstCombine/PR37526.ll b/llvm/test/Transforms/InstCombine/PR37526.ll
new file mode 100644
index 00000000000..651c25ae044
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/PR37526.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define void @PR37526(i32* %pz, i32* %px, i32* %py) {
+; CHECK-LABEL: @PR37526(
+; CHECK-NEXT:    [[T2:%.*]] = load i32, i32* [[PY:%.*]], align 4
+; CHECK-NEXT:    [[T3:%.*]] = load i32, i32* [[PX:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[T2]], [[T3]]
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[CMP]], i32 [[T3]], i32 [[T2]]
+; CHECK-NEXT:    store i32 [[R1]], i32* [[PZ:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %t1 = bitcast i32* %pz to i64*
+  %t2 = load i32, i32* %py
+  %t3 = load i32, i32* %px
+  %cmp = icmp slt i32 %t2, %t3
+  %select = select i1 %cmp, i32* %px, i32* %py
+  %bc = bitcast i32* %select to i64*
+  %r = load i64, i64* %bc
+  store i64 %r, i64* %t1
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll b/llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll
new file mode 100644
index 00000000000..10b4e4d6263
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll
@@ -0,0 +1,131 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1
+
+define <4 x i32> @test1(<4 x i32>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
+
+; CHECK-LABEL: @test1
+; CHECK: @llvm.ppc.altivec.lvx
+; CHECK: ret <4 x i32>
+
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+}
+
+define <4 x i32> @test1a(<4 x i32>* align 16 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
+
+; CHECK-LABEL: @test1a
+; CHECK-NOT: @llvm.ppc.altivec.lvx
+; CHECK: ret <4 x i32>
+
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+}
+
+declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0
+
+define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2
+; CHECK: @llvm.ppc.altivec.stvx
+; CHECK: ret <4 x i32>
+}
+
+define <4 x i32> @test2a(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: @llvm.ppc.altivec.stvx
+; CHECK: ret <4 x i32>
+}
+
+declare <4 x i32> @llvm.ppc.altivec.lvxl(i8*) #1
+
+define <4 x i32> @test1l(<4 x i32>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
+
+; CHECK-LABEL: @test1l
+; CHECK: @llvm.ppc.altivec.lvxl
+; CHECK: ret <4 x i32>
+
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+}
+
+define <4 x i32> @test1la(<4 x i32>* align 16 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
+
+; CHECK-LABEL: @test1la
+; CHECK-NOT: @llvm.ppc.altivec.lvxl
+; CHECK: ret <4 x i32>
+
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+}
+
+declare void @llvm.ppc.altivec.stvxl(<4 x i32>, i8*) #0
+
+define <4 x i32> @test2l(<4 x i32>* %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK: @llvm.ppc.altivec.stvxl
+; CHECK: ret <4 x i32>
+}
+
+define <4 x i32> @test2la(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK-NOT: @llvm.ppc.altivec.stvxl
+; CHECK: ret <4 x i32>
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
diff --git a/llvm/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll b/llvm/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll
new file mode 100644
index 00000000000..e9710df5670
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll
@@ -0,0 +1,165 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x double> @llvm.ppc.qpx.qvlfs(i8*) #1
+
+define <4 x double> @test1(<4 x float>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
+  %hv = bitcast <4 x float>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv)
+
+; CHECK-LABEL: @test1
+; CHECK: @llvm.ppc.qpx.qvlfs
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x float>, <4 x float>* %h, align 8
+  %v0e = fpext <4 x float> %v0 to <4 x double>
+  %a = fadd <4 x double> %v0e, %vl
+  ret <4 x double> %a
+}
+
+define <4 x double> @test1a(<4 x float>* align 16 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
+  %hv = bitcast <4 x float>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv)
+
+; CHECK-LABEL: @test1a
+; CHECK-NOT: @llvm.ppc.qpx.qvlfs
+; CHECK-NOT: load <4 x double>
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x float>, <4 x float>* %h, align 8
+  %v0e = fpext <4 x float> %v0 to <4 x double>
+  %a = fadd <4 x double> %v0e, %vl
+  ret <4 x double> %a
+}
+
+declare void @llvm.ppc.qpx.qvstfs(<4 x double>, i8*) #0
+
+define <4 x float> @test2(<4 x float>* %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
+  %hv = bitcast <4 x float>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x float>, <4 x float>* %h, align 8
+  ret <4 x float> %v0
+
+; CHECK-LABEL: @test2
+; CHECK: @llvm.ppc.qpx.qvstfs
+; CHECK: ret <4 x float>
+}
+
+define <4 x float> @test2a(<4 x float>* align 16 %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
+  %hv = bitcast <4 x float>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x float>, <4 x float>* %h, align 8
+  ret <4 x float> %v0
+
+; CHECK-LABEL: @test2
+; CHECK: fptrunc <4 x double> %d to <4 x float>
+; CHECK-NOT: @llvm.ppc.qpx.qvstfs
+; CHECK-NOT: store <4 x double>
+; CHECK: ret <4 x float>
+}
+
+declare <4 x double> @llvm.ppc.qpx.qvlfd(i8*) #1
+
+define <4 x double> @test1l(<4 x double>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1l
+; CHECK: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
+  %a = fadd <4 x double> %v0, %vl
+  ret <4 x double> %a
+}
+
+define <4 x double> @test1ln(<4 x double>* align 16 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1ln
+; CHECK: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
+  %a = fadd <4 x double> %v0, %vl
+  ret <4 x double> %a
+}
+
+define <4 x double> @test1la(<4 x double>* align 32 %h) #0 {
+entry:
+  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
+
+; CHECK-LABEL: @test1la
+; CHECK-NOT: @llvm.ppc.qpx.qvlfd
+; CHECK: ret <4 x double>
+
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
+  %a = fadd <4 x double> %v0, %vl
+  ret <4 x double> %a
+}
+
+declare void @llvm.ppc.qpx.qvstfd(<4 x double>, i8*) #0
+
+define <4 x double> @test2l(<4 x double>* %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
+  ret <4 x double> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+define <4 x double> @test2ln(<4 x double>* align 16 %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
+  ret <4 x double> %v0
+
+; CHECK-LABEL: @test2ln
+; CHECK: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+define <4 x double> @test2la(<4 x double>* align 32 %h, <4 x double> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
+  %hv = bitcast <4 x double>* %h1 to i8*
+  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
+
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
+  ret <4 x double> %v0
+
+; CHECK-LABEL: @test2l
+; CHECK-NOT: @llvm.ppc.qpx.qvstfd
+; CHECK: ret <4 x double>
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
diff --git a/llvm/test/Transforms/InstCombine/PowerPC/lit.local.cfg b/llvm/test/Transforms/InstCombine/PowerPC/lit.local.cfg
new file mode 100644
index 00000000000..5d33887ff0a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
+
diff --git a/llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll b/llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll
new file mode 100644
index 00000000000..ad264fb15b3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll
@@ -0,0 +1,44 @@
+; Verify that we can create unaligned loads and stores from VSX intrinsics.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target triple = "powerpc64-unknown-linux-gnu"
+
+@vf = common global <4 x float> zeroinitializer, align 1
+@res_vf = common global <4 x float> zeroinitializer, align 1
+@vd = common global <2 x double> zeroinitializer, align 1
+@res_vd = common global <2 x double> zeroinitializer, align 1
+
+define void @test1() {
+entry:
+  %t1 = alloca <4 x float>*, align 8
+  %t2 = alloca <2 x double>*, align 8
+  store <4 x float>* @vf, <4 x float>** %t1, align 8
+  %0 = load <4 x float>*, <4 x float>** %t1, align 8
+  %1 = bitcast <4 x float>* %0 to i8*
+  %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1)
+  store <4 x float>* @res_vf, <4 x float>** %t1, align 8
+  %3 = load <4 x float>*, <4 x float>** %t1, align 8
+  %4 = bitcast <4 x float>* %3 to i8*
+  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4)
+  store <2 x double>* @vd, <2 x double>** %t2, align 8
+  %5 = load <2 x double>*, <2 x double>** %t2, align 8
+  %6 = bitcast <2 x double>* %5 to i8*
+  %7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6)
+  store <2 x double>* @res_vd, <2 x double>** %t2, align 8
+  %8 = load <2 x double>*, <2 x double>** %t2, align 8
+  %9 = bitcast <2 x double>* %8 to i8*
+  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9)
+  ret void
+}
+
+; CHECK-LABEL: @test1
+; CHECK: %0 = load <4 x i32>, <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
+; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1
+; CHECK: %1 = load <2 x double>, <2 x double>* @vd, align 1
+; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1
+
+declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
+declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*)
+declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*)
+declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*)
diff --git a/llvm/test/Transforms/InstCombine/README.txt b/llvm/test/Transforms/InstCombine/README.txt
new file mode 100644
index 00000000000..de043c77489
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/README.txt
@@ -0,0 +1,4 @@
+This directory contains test cases for the instcombine transformation.  The
+dated tests are actual bug tests, whereas the named tests are used to test
+for features that the this pass should be capable of performing.
+
diff --git a/llvm/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll b/llvm/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll
new file mode 100644
index 00000000000..fc1034a8d48
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll
@@ -0,0 +1,210 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; The test checks the folding of cmp(sub(a,b),0) into cmp(a,b).
+
+define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD128_safe(
+; CHECK-NEXT:    [[SUB_SAFE:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5)
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub.safe = fsub <2 x double> %a, %b
+  %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.safe, <2 x double> zeroinitializer, i32 5)
+  %t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %t2 = bitcast <8 x i1> %t1 to i8
+  ret i8 %t2
+}
+
+define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD128(
+; CHECK-NEXT:    [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub.i = fsub ninf <2 x double> %a, %b
+  %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.i, <2 x double> zeroinitializer, i32 5)
+  %t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %t2 = bitcast <8 x i1> %t1 to i8
+  ret i8 %t2
+}
+
+define i8 @sub_compare_foldingPD128_undef_elt(<2 x double> %a, <2 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD128_undef_elt(
+; CHECK-NEXT:    [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5)
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub.i = fsub ninf <2 x double> %a, %b
+  %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %sub.i, <2 x double> <double 0.0, double undef>, i32 5)
+  %t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %t2 = bitcast <8 x i1> %t1 to i8
+  ret i8 %t2
+}
+
+define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD256(
+; CHECK-NEXT:    [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5)
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub.i1 = fsub ninf <4 x double> %a, %b
+  %t0 = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
+  %t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %t2 = bitcast <8 x i1> %t1 to i8
+  ret i8 %t2
+}
+
+define i8 @sub_compare_foldingPD512(<8 x double> %a, <8 x double> %b){
+; CHECK-LABEL: @sub_compare_foldingPD512(
+; CHECK-NEXT:    [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i32 4)
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
+; CHECK-NEXT:    ret i8 [[T1]]
+;
+  %sub.i2 = fsub ninf <8 x double> %a, %b
+  %t0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i32 4)
+  %t1 = bitcast <8 x i1> %t0 to i8
+  ret i8 %t1
+}
+
+define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){
+; CHECK-LABEL: @sub_compare_foldingPS128(
+; CHECK-NEXT:    [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12)
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub.i3 = fsub ninf <4 x float> %a, %b
+  %t0 = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12)
+  %t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %t2 = bitcast <8 x i1> %t1 to i8
+  ret i8 %t2
+}
+
+define i8 @sub_compare_foldingPS256(<8 x float> %a, <8 x float> %b){
+; CHECK-LABEL: @sub_compare_foldingPS256(
+; CHECK-NEXT:    [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5)
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
+; CHECK-NEXT:    ret i8 [[T1]]
+;
+  %sub.i4 = fsub ninf <8 x float> %a, %b
+  %t0 = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5)
+  %t1 = bitcast <8 x i1> %t0 to i8
+  ret i8 %t1
+}
+
+define i16 @sub_compare_foldingPS512(<16 x float> %a, <16 x float> %b){
+; CHECK-LABEL: @sub_compare_foldingPS512(
+; CHECK-NEXT:    [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i32 4)
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <16 x i1> [[T0]] to i16
+; CHECK-NEXT:    ret i16 [[T1]]
+;
+  %sub.i5 = fsub ninf <16 x float> %a, %b
+  %t0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i32 4)
+  %t1 = bitcast <16 x i1> %t0 to i16
+  ret i16 %t1
+}
+
+define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPD128(
+; CHECK-NEXT:    [[T0:%.*]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x i1> [[T0]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub.i = fsub ninf <2 x double> %a, %b
+  %t0 = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5)
+  %t1 = shufflevector <2 x i1> %t0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %t2 = bitcast <8 x i1> %t1 to i8
+  ret i8 %t2
+}
+
+define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPD256(
+; CHECK-NEXT:    [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub.i = fsub ninf <4 x double> %a, %b
+  %t0 = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5)
+  %t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %t2 = bitcast <8 x i1> %t1 to i8
+  ret i8 %t2
+}
+
+define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @sub_compare_folding_swapPD256_undef(
+; CHECK-NEXT:    [[TMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> undef, <4 x double> zeroinitializer, i32 5)
+; CHECK-NEXT:    [[T0:%.*]] = shufflevector <4 x i1> [[TMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
+; CHECK-NEXT:    ret i8 [[T1]]
+;
+  %sub.i1 = fsub ninf <4 x double> undef, undef
+  %tmp = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5)
+  %t0 = shufflevector <4 x i1> %tmp, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %t1 = bitcast <8 x i1> %t0 to i8
+  ret i8 %t1
+}
+
+define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPD512(
+; CHECK-NEXT:    [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i32 4)
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
+; CHECK-NEXT:    ret i8 [[T1]]
+;
+  %sub.i = fsub ninf <8 x double> %a, %b
+  %t0 = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i32 4)
+  %t1 = bitcast <8 x i1> %t0 to i8
+  ret i8 %t1
+}
+
+define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPS128(
+; CHECK-NEXT:    [[T0:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12)
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i1> [[T0]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <8 x i1> [[T1]] to i8
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub.i = fsub ninf <4 x float> %a, %b
+  %t0 = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12)
+  %t1 = shufflevector <4 x i1> %t0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %t2 = bitcast <8 x i1> %t1 to i8
+  ret i8 %t2
+}
+
+define i8 @sub_compare_folding_swapPS256(<8 x float> %a, <8 x float> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPS256(
+; CHECK-NEXT:    [[T0:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <8 x i1> [[T0]] to i8
+; CHECK-NEXT:    ret i8 [[T1]]
+;
+  %sub.i = fsub ninf <8 x float> %a, %b
+  %t0 = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5)
+  %t1 = bitcast <8 x i1> %t0 to i8
+  ret i8 %t1
+}
+
+define i16 @sub_compare_folding_swapPS512(<16 x float> %a, <16 x float> %b){
+; CHECK-LABEL: @sub_compare_folding_swapPS512(
+; CHECK-NEXT:    [[T0:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i32 4)
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <16 x i1> [[T0]] to i16
+; CHECK-NEXT:    ret i16 [[T1]]
+;
+  %sub.i = fsub ninf <16 x float> %a, %b
+  %t0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i32 4)
+  %t1 = bitcast <16 x i1> %t0 to i16
+  ret i16 %t1
+}
+
+declare <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double>, <2 x double>, i32)
+declare <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double>, <4 x double>, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double>, <8 x double>, i32, i32)
+declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
+declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
+declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)
diff --git a/llvm/test/Transforms/InstCombine/X86/addcarry.ll b/llvm/test/Transforms/InstCombine/X86/addcarry.ll
new file mode 100644
index 00000000000..d762b4e96da
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/addcarry.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
+declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64)
+
+define i32 @no_carryin_i32(i32 %x, i32 %y, i8* %p) {
+; CHECK-LABEL: @no_carryin_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
+; CHECK-NEXT:    store i8 [[TMP4]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %s = call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %x, i32 %y)
+  %ov = extractvalue { i8, i32 } %s, 0
+  store i8 %ov, i8* %p
+  %r = extractvalue { i8, i32 } %s, 1
+  ret i32 %r
+}
+
+define i64 @no_carryin_i64(i64 %x, i64 %y, i8* %p) {
+; CHECK-LABEL: @no_carryin_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
+; CHECK-NEXT:    store i8 [[TMP4]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %s = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 %x, i64 %y)
+  %ov = extractvalue { i8, i64 } %s, 0
+  store i8 %ov, i8* %p
+  %r = extractvalue { i8, i64 } %s, 1
+  ret i64 %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
new file mode 100644
index 00000000000..864e2b9aa67
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
@@ -0,0 +1,296 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s
+
+define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[AB:%.*]], <2 x double> [[XY:%.*]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
+  ret <2 x double> %1
+}
+
+define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_zero(
+; CHECK-NEXT:    ret <2 x double> [[XY:%.*]]
+;
+  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer)
+  ret <2 x double> %1
+}
+
+define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_dup(
+; CHECK-NEXT:    ret <2 x double> [[XY:%.*]]
+;
+  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel)
+  ret <2 x double> %1
+}
+
+define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[XYZW:%.*]], <4 x float> [[ABCD:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
+  ret <4 x float> %1
+}
+
+define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_zero(
+; CHECK-NEXT:    ret <4 x float> [[XYZW:%.*]]
+;
+  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer)
+  ret <4 x float> %1
+}
+
+define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_dup(
+; CHECK-NEXT:    ret <4 x float> [[XYZW:%.*]]
+;
+  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel)
+  ret <4 x float> %1
+}
+
+define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[XYZW:%.*]], <16 x i8> [[ABCD:%.*]], <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 21, i32 22, i32 7, i32 8, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 15>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_zero(
+; CHECK-NEXT:    ret <16 x i8> [[XYZW:%.*]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_dup(
+; CHECK-NEXT:    ret <16 x i8> [[XYZW:%.*]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel)
+  ret <16 x i8> %1
+}
+
+define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_avx(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[AB:%.*]], <4 x double> [[XY:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
+  ret <4 x double> %1
+}
+
+define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_avx_zero(
+; CHECK-NEXT:    ret <4 x double> [[XY:%.*]]
+;
+  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer)
+  ret <4 x double> %1
+}
+
+define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_avx_dup(
+; CHECK-NEXT:    ret <4 x double> [[XY:%.*]]
+;
+  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel)
+  ret <4 x double> %1
+}
+
+define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_avx(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[XYZW:%.*]], <8 x float> [[ABCD:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 4, i32 5, i32 6, i32 15>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
+  ret <8 x float> %1
+}
+
+define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_avx_zero(
+; CHECK-NEXT:    ret <8 x float> [[XYZW:%.*]]
+;
+  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer)
+  ret <8 x float> %1
+}
+
+define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_avx_dup(
+; CHECK-NEXT:    ret <8 x float> [[XYZW:%.*]]
+;
+  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel)
+  ret <8 x float> %1
+}
+
+define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[XYZW:%.*]], <32 x i8> [[ABCD:%.*]], <32 x i32> <i32 0, i32 1, i32 34, i32 3, i32 36, i32 37, i32 38, i32 7, i32 8, i32 9, i32 42, i32 11, i32 44, i32 45, i32 46, i32 15, i32 16, i32 17, i32 50, i32 19, i32 52, i32 53, i32 54, i32 23, i32 24, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 31>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
+  <32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
+  i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
+  i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
+  i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_avx2_zero(
+; CHECK-NEXT:    ret <32 x i8> [[XYZW:%.*]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_avx2_dup(
+; CHECK-NEXT:    ret <32 x i8> [[XYZW:%.*]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel)
+  ret <32 x i8> %1
+}
+
+define <4 x float> @sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i1> %cond) {
+; CHECK-LABEL: @sel_v4f32(
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[COND:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]]
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %s = sext <4 x i1> %cond to <4 x i32>
+  %b = bitcast <4 x i32> %s to <4 x float>
+  %r = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %x, <4 x float> %y, <4 x float> %b)
+  ret <4 x float> %r
+}
+
+define <2 x double> @sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i1> %cond) {
+; CHECK-LABEL: @sel_v2f64(
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[COND:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[X:%.*]]
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %s = sext <2 x i1> %cond to <2 x i64>
+  %b = bitcast <2 x i64> %s to <2 x double>
+  %r = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %x, <2 x double> %y, <2 x double> %b)
+  ret <2 x double> %r
+}
+
+; Bitcast X, Y, and the select and remove the intrinsic.
+
+define <16 x i8> @sel_v4i32(<16 x i8> %x, <16 x i8> %y, <4 x i1> %cond) {
+; CHECK-LABEL: @sel_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[Y:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+; CHECK-NEXT:    ret <16 x i8> [[R]]
+;
+  %s = sext <4 x i1> %cond to <4 x i32>
+  %b = bitcast <4 x i32> %s to <16 x i8>
+  %r = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b)
+  ret <16 x i8> %r
+}
+
+define <16 x i8> @sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i1> %cond) {
+; CHECK-LABEL: @sel_v16i8(
+; CHECK-NEXT:    [[R:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i8> [[Y:%.*]], <16 x i8> [[X:%.*]]
+; CHECK-NEXT:    ret <16 x i8> [[R]]
+;
+  %s = sext <16 x i1> %cond to <16 x i8>
+  %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %x, <16 x i8> %y, <16 x i8> %s)
+  ret <16 x i8> %r
+}
+
+; PR38814: https://bugs.llvm.org/show_bug.cgi?id=38814
+; Repeat the tests above using the minimal form that we expect when using C intrinsics in code.
+; This verifies that nothing is interfering with the blend transform. This also tests the
+; expected IR when 1 of the blend operands is a constant 0 vector. Potentially, this could
+; be transformed to bitwise logic in IR, but currently that transform is left to the backend.
+
+define <4 x float> @sel_v4f32_sse_reality(<4 x float>* %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: @sel_v4f32_sse_reality(
+; CHECK-NEXT:    [[LD:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[CMP]], <4 x float> zeroinitializer, <4 x float> [[LD]]
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %ld = load <4 x float>, <4 x float>* %x, align 16
+  %cmp = fcmp olt <4 x float> %z, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %cond = bitcast <4 x i32> %sext to <4 x float>
+  %r = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %ld, <4 x float> zeroinitializer, <4 x float> %cond)
+  ret <4 x float> %r
+}
+
+define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: @sel_v2f64_sse_reality(
+; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, <2 x double>* [[X:%.*]], align 16
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <2 x double> [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x double> zeroinitializer, <2 x double> [[LD]]
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %ld = load <2 x double>, <2 x double>* %x, align 16
+  %cmp = fcmp olt <2 x double> %z, %y
+  %sext = sext <2 x i1> %cmp to <2 x i64>
+  %cond = bitcast <2 x i64> %sext to <2 x double>
+  %r = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %ld, <2 x double> zeroinitializer, <2 x double> %cond)
+  ret <2 x double> %r
+}
+
+; Bitcast the inputs and the result and remove the intrinsic.
+
+define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: @sel_v4i32_sse_reality(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[LD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
+; CHECK-NEXT:    [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i32> [[YCAST]], [[ZCAST]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> [[LD1]]
+; CHECK-NEXT:    [[RCAST:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[RCAST]]
+;
+  %xcast = bitcast <2 x i64>* %x to <16 x i8>*
+  %ld = load <16 x i8>, <16 x i8>* %xcast, align 16
+  %ycast = bitcast <2 x i64> %y to <4 x i32>
+  %zcast = bitcast <2 x i64> %z to <4 x i32>
+  %cmp = icmp sgt <4 x i32> %ycast, %zcast
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %cond = bitcast <4 x i32> %sext to <16 x i8>
+  %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %ld, <16 x i8> zeroinitializer, <16 x i8> %cond)
+  %rcast = bitcast <16 x i8> %r to <2 x i64>
+  ret <2 x i64> %rcast
+}
+
+define <2 x i64> @sel_v16i8_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: @sel_v16i8_sse_reality(
+; CHECK-NEXT:    [[XCAST:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <16 x i8>*
+; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[XCAST]], align 16
+; CHECK-NEXT:    [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <16 x i8>
+; CHECK-NEXT:    [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <16 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <16 x i8> [[YCAST]], [[ZCAST]]
+; CHECK-NEXT:    [[R:%.*]] = select <16 x i1> [[CMP]], <16 x i8> zeroinitializer, <16 x i8> [[LD]]
+; CHECK-NEXT:    [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[RCAST]]
+;
+  %xcast = bitcast <2 x i64>* %x to <16 x i8>*
+  %ld = load <16 x i8>, <16 x i8>* %xcast, align 16
+  %ycast = bitcast <2 x i64> %y to <16 x i8>
+  %zcast = bitcast <2 x i64> %z to <16 x i8>
+  %cmp = icmp sgt <16 x i8> %ycast, %zcast
+  %sext = sext <16 x i1> %cmp to <16 x i8>
+  %r = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %ld, <16 x i8> zeroinitializer, <16 x i8> %sext)
+  %rcast = bitcast <16 x i8> %r to <2 x i64>
+  ret <2 x i64> %rcast
+}
+
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
+
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
diff --git a/llvm/test/Transforms/InstCombine/X86/clmulqdq.ll b/llvm/test/Transforms/InstCombine/X86/clmulqdq.ll
new file mode 100644
index 00000000000..12429e16909
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/clmulqdq.ll
@@ -0,0 +1,266 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8)
+declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8)
+declare <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64>, <8 x i64>, i8)
+
+define <2 x i64> @test_demanded_elts_pclmulqdq_0(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], i8 0)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = insertelement <2 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <2 x i64> %a1, i64 1, i64 1
+  %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 0)
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_demanded_elts_pclmulqdq_1(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> <i64 undef, i64 1>, <2 x i64> [[A1:%.*]], i8 1)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = insertelement <2 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <2 x i64> %a1, i64 1, i64 1
+  %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 1)
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_demanded_elts_pclmulqdq_16(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> [[A0:%.*]], <2 x i64> <i64 undef, i64 1>, i8 16)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = insertelement <2 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <2 x i64> %a1, i64 1, i64 1
+  %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 16)
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_demanded_elts_pclmulqdq_17(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_17(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> <i64 undef, i64 1>, <2 x i64> <i64 undef, i64 1>, i8 17)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = insertelement <2 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <2 x i64> %a1, i64 1, i64 1
+  %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 17)
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_demanded_elts_pclmulqdq_undef_0() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_undef_0(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> <i64 undef, i64 1>, <2 x i64> <i64 undef, i64 1>, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_demanded_elts_pclmulqdq_undef_1() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_undef_1(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 undef, i64 1>, i8 1)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_demanded_elts_pclmulqdq_undef_16() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_undef_16(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> <i64 undef, i64 1>, <2 x i64> <i64 1, i64 undef>, i8 16)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_demanded_elts_pclmulqdq_undef_17() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_undef_17(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 1, i64 undef>, i8 17)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_0(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_0(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]], i8 0)
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <4 x i64> %1, i64 1, i64 3
+  %4 = insertelement <4 x i64> %2, i64 1, i64 3
+  %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 0)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_1(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_1(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> [[A1:%.*]], i8 1)
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <4 x i64> %1, i64 1, i64 3
+  %4 = insertelement <4 x i64> %2, i64 1, i64 3
+  %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 1)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_16(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_16(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> [[A0:%.*]], <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 16)
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <4 x i64> %1, i64 1, i64 3
+  %4 = insertelement <4 x i64> %2, i64 1, i64 3
+  %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 16)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_17(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_17(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 17)
+; CHECK-NEXT:    ret <4 x i64> [[RES]]
+;
+  %1 = insertelement <4 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <4 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <4 x i64> %1, i64 1, i64 3
+  %4 = insertelement <4 x i64> %2, i64 1, i64 3
+  %res = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %3, <4 x i64> %4, i8 17)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_0() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_0(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_1() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_1(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, <4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, i8 1)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_16() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_16(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 undef, i64 1, i64 undef, i64 1>, <4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, i8 16)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_demanded_elts_pclmulqdq_256_undef_17() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_256_undef_17(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, <4 x i64> <i64 1, i64 undef, i64 1, i64 undef>, i8 17)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_0(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_0(
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1:%.*]], i8 0)
+; CHECK-NEXT:    ret <8 x i64> [[RES]]
+;
+  %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <8 x i64> %1, i64 1, i64 3
+  %4 = insertelement <8 x i64> %2, i64 1, i64 3
+  %5 = insertelement <8 x i64> %3, i64 1, i64 5
+  %6 = insertelement <8 x i64> %4, i64 1, i64 5
+  %7 = insertelement <8 x i64> %5, i64 1, i64 7
+  %8 = insertelement <8 x i64> %6, i64 1, i64 7
+  %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 0)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_1(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_1(
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> [[A1:%.*]], i8 1)
+; CHECK-NEXT:    ret <8 x i64> [[RES]]
+;
+  %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <8 x i64> %1, i64 1, i64 3
+  %4 = insertelement <8 x i64> %2, i64 1, i64 3
+  %5 = insertelement <8 x i64> %3, i64 1, i64 5
+  %6 = insertelement <8 x i64> %4, i64 1, i64 5
+  %7 = insertelement <8 x i64> %5, i64 1, i64 7
+  %8 = insertelement <8 x i64> %6, i64 1, i64 7
+  %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_16(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_16(
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> [[A0:%.*]], <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 16)
+; CHECK-NEXT:    ret <8 x i64> [[RES]]
+;
+  %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <8 x i64> %1, i64 1, i64 3
+  %4 = insertelement <8 x i64> %2, i64 1, i64 3
+  %5 = insertelement <8 x i64> %3, i64 1, i64 5
+  %6 = insertelement <8 x i64> %4, i64 1, i64 5
+  %7 = insertelement <8 x i64> %5, i64 1, i64 7
+  %8 = insertelement <8 x i64> %6, i64 1, i64 7
+  %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 16)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_17(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_17(
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 17)
+; CHECK-NEXT:    ret <8 x i64> [[RES]]
+;
+  %1 = insertelement <8 x i64> %a0, i64 1, i64 1
+  %2 = insertelement <8 x i64> %a1, i64 1, i64 1
+  %3 = insertelement <8 x i64> %1, i64 1, i64 3
+  %4 = insertelement <8 x i64> %2, i64 1, i64 3
+  %5 = insertelement <8 x i64> %3, i64 1, i64 5
+  %6 = insertelement <8 x i64> %4, i64 1, i64 5
+  %7 = insertelement <8 x i64> %5, i64 1, i64 7
+  %8 = insertelement <8 x i64> %6, i64 1, i64 7
+  %res = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %7, <8 x i64> %8, i8 17)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_0() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_0(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 0)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_1() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_1(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, <8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, i8 1)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_16() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_16(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1>, <8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, i8 16)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_demanded_elts_pclmulqdq_512_undef_17() {
+; CHECK-LABEL: @test_demanded_elts_pclmulqdq_512_undef_17(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, <8 x i64> <i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef, i64 1, i64 undef>, i8 17)
+  ret <8 x i64> %1
+}
diff --git a/llvm/test/Transforms/InstCombine/X86/lit.local.cfg b/llvm/test/Transforms/InstCombine/X86/lit.local.cfg
new file mode 100644
index 00000000000..c8625f4d9d2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/Transforms/InstCombine/X86/pr2645-1.ll b/llvm/test/Transforms/InstCombine/X86/pr2645-1.ll
new file mode 100644
index 00000000000..2986d21866b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/pr2645-1.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | grep shufflevector
+; PR2645
+
+; instcombine shouldn't delete the shufflevector.
+
+define internal void @""(i8*, i32, i8*) {
+; <label>:3
+        br label %4
+
+; <label>:4             ; preds = %6, %3
+        %.0 = phi i32 [ 0, %3 ], [ %19, %6 ]            ; <i32> [#uses=4]
+        %5 = icmp slt i32 %.0, %1               ; <i1> [#uses=1]
+        br i1 %5, label %6, label %20
+
+; <label>:6             ; preds = %4
+        %7 = getelementptr i8, i8* %2, i32 %.0              ; <i8*> [#uses=1]
+        %8 = bitcast i8* %7 to <4 x i16>*               ; <<4 x i16>*> [#uses=1]
+        %9 = load <4 x i16>, <4 x i16>* %8, align 1                ; <<4 x i16>> [#uses=1]
+        %10 = bitcast <4 x i16> %9 to <1 x i64>         ; <<1 x i64>> [#uses=1]
+        %11 = call <2 x i64> @foo(<1 x i64> %10)
+; <<2 x i64>> [#uses=1]
+        %12 = bitcast <2 x i64> %11 to <4 x i32>                ; <<4 x i32>> [#uses=1]
+        %13 = bitcast <4 x i32> %12 to <8 x i16>                ; <<8 x i16>> [#uses=2]
+        %14 = shufflevector <8 x i16> %13, <8 x i16> %13, <8 x i32> < i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3 >          ; <<8 x i16>> [#uses=1]
+        %15 = bitcast <8 x i16> %14 to <4 x i32>                ; <<4 x i32>> [#uses=1]
+        %16 = sitofp <4 x i32> %15 to <4 x float>               ; <<4 x float>> [#uses=1]
+        %17 = getelementptr i8, i8* %0, i32 %.0             ; <i8*> [#uses=1]
+        %18 = bitcast i8* %17 to <4 x float>*           ; <<4 x float>*> [#uses=1]
+        store <4 x float> %16, <4 x float>* %18, align 1
+        %19 = add i32 %.0, 1            ; <i32> [#uses=1]
+        br label %4
+
+; <label>:20            ; preds = %4
+        call void @llvm.x86.mmx.emms( )
+        ret void
+}
+
+declare <2 x i64> @foo(<1 x i64>)
+declare void @llvm.x86.mmx.emms( )
diff --git a/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll
new file mode 100644
index 00000000000..d95c42da5f7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: shufflevector{{.*}}i32 8"
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+	%struct.ActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
+	%struct.AlphaTest = type { float, i16, i8, i8 }
+	%struct.ArrayRange = type { i8, i8, i8, i8 }
+	%struct.BlendMode = type { i16, i16, i16, i16, %struct.IColor4, i16, i16, i8, i8, i8, i8 }
+	%struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 }
+	%struct.ClipPlane = type { i32, [6 x %struct.IColor4] }
+	%struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] }
+	%struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale }
+	%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
+	%struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
+	%struct.FixedFunction = type { %struct.PPStreamToken* }
+	%struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
+	%struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
+	%struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 }
+	%struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 }
+	%struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] }
+	%struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float }
+	%struct.LightModel = type { %struct.IColor4, [8 x %struct.Light], [2 x %struct.Material], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
+	%struct.LightProduct = type { %struct.IColor4, %struct.IColor4, %struct.IColor4 }
+	%struct.LineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
+	%struct.LogicOp = type { i16, i8, i8 }
+	%struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] }
+	%struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] }
+	%struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 }
+	%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
+	%struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* }
+	%struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 }
+	%struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
+	%struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack }
+	%struct.PixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
+	%struct.PluginBufferData = type { i32 }
+	%struct.PointLineLimits = type { float, float, float }
+	%struct.PointMode = type { float, float, float, float, %struct.PointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
+	%struct.PolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.ProgramLimits = type { i32, i32, i32, i32 }
+	%struct.RegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.IColor4], [8 x %struct.RegisterCombinersPerStageState], %struct.RegisterCombinersFinalStageState }
+	%struct.RegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.RegisterCombinersPerVariableState] }
+	%struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
+	%struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] }
+	%struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
+	%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+	%struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 }
+	%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }>
+	%struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
+	%struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 }
+	%struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
+	%struct.TextureImageMode = type { float }
+	%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+	%struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
+	%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
+	%struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
+	%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
+	%struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
+	%struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
+	%struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
+	%struct.IColor4 = type { float, float, float, float }
+	%struct.TCoord2 = type { float, float }
+	%struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 }
+	%struct.VMTextures = type { [16 x %struct.TextureRec*] }
+	%struct.PPStreamToken = type { { i16, i16, i32 } }
+	%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
+
+define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk) nounwind {
+bb266.i:
+	getelementptr <4 x float>, <4 x float>* null, i32 11		; <<4 x float>*>:0 [#uses=1]
+	load <4 x float>, <4 x float>* %0, align 16		; <<4 x float>>:1 [#uses=1]
+	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 1, i32 1 >		; <<4 x float>>:2 [#uses=1]
+	shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:3 [#uses=1]
+	shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:4 [#uses=1]
+	shufflevector <4 x float> %4, <4 x float> %3, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>>:5 [#uses=1]
+	fmul <4 x float> %5, zeroinitializer		; <<4 x float>>:6 [#uses=2]
+	fmul <4 x float> %6, %6		; <<4 x float>>:7 [#uses=1]
+	fadd <4 x float> zeroinitializer, %7		; <<4 x float>>:8 [#uses=1]
+	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> zeroinitializer, <4 x float> %8 ) nounwind readnone		; <<4 x float>>:9 [#uses=1]
+	%phitmp40 = bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp4109.i = and <4 x i32> %phitmp40, < i32 8388607, i32 8388607, i32 8388607, i32 8388607 >		; <<4 x i32>> [#uses=1]
+	%tmp4116.i = or <4 x i32> %tmp4109.i, < i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216 >		; <<4 x i32>> [#uses=1]
+	%tmp4117.i = bitcast <4 x i32> %tmp4116.i to <4 x float>		; <<4 x float>> [#uses=1]
+	fadd <4 x float> %tmp4117.i, zeroinitializer		; <<4 x float>>:10 [#uses=1]
+	fmul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:11 [#uses=1]
+	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %11, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:12 [#uses=1]
+	call <4 x float> @llvm.x86.sse.min.ps( <4 x float> %12, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:13 [#uses=1]
+	%tmp4170.i = call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %13, <4 x float> zeroinitializer, i8 2 ) nounwind		; <<4 x float>> [#uses=1]
+	bitcast <4 x float> %tmp4170.i to <16 x i8>		; <<16 x i8>>:14 [#uses=1]
+	call i32 @llvm.x86.sse2.pmovmskb.128( <16 x i8> %14 ) nounwind readnone		; <i32>:15 [#uses=1]
+	icmp eq i32 %15, 0		; <i1>:16 [#uses=1]
+	br i1 %16, label %bb5574.i, label %bb4521.i
+
+bb4521.i:		; preds = %bb266.i
+	unreachable
+
+bb5574.i:		; preds = %bb266.i
+	unreachable
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx.ll
new file mode 100644
index 00000000000..bad27d1e0c4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-avx.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32)
+declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32)
+
+define <8 x float> @test_round_ps_floor(<8 x float> %a) {
+; CHECK-LABEL: @test_round_ps_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[A:%.*]])
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a, i32 1)
+  ret <8 x float> %1
+}
+
+define <8 x float> @test_round_ps_ceil(<8 x float> %a) {
+; CHECK-LABEL: @test_round_ps_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[A:%.*]])
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a, i32 2)
+  ret <8 x float> %1
+}
+
+define <4 x double> @test_round_pd_floor(<4 x double> %a) {
+; CHECK-LABEL: @test_round_pd_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[A:%.*]])
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a, i32 1)
+  ret <4 x double> %1
+}
+
+define <4 x double> @test_round_pd_ceil(<4 x double> %a) {
+; CHECK-LABEL: @test_round_pd_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[A:%.*]])
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a, i32 2)
+  ret <4 x double> %1
+}
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll
new file mode 100644
index 00000000000..f4045f788e2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) {
+; CHECK-LABEL: @identity_test_vpermd(
+; CHECK-NEXT:    ret <8 x i32> %a0
+;
+  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+  ret <8 x i32> %a
+}
+
+define <8 x float> @identity_test_vpermps(<8 x float> %a0) {
+; CHECK-LABEL: @identity_test_vpermps(
+; CHECK-NEXT:    ret <8 x float> %a0
+;
+  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+  ret <8 x float> %a
+}
+
+; Instcombine should be able to fold the following shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) {
+; CHECK-LABEL: @zero_test_vpermd(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
+  ret <8 x i32> %a
+}
+
+define <8 x float> @zero_test_vpermps(<8 x float> %a0) {
+; CHECK-LABEL: @zero_test_vpermps(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
+  ret <8 x float> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles.
+
+define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) {
+; CHECK-LABEL: @shuffle_test_vpermd(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x i32> %a
+}
+
+define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) {
+; CHECK-LABEL: @shuffle_test_vpermps(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles with undef mask elements.
+
+define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
+; CHECK-LABEL: @undef_test_vpermd(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x i32> %a
+}
+
+define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
+; CHECK-LABEL: @undef_test_vpermps(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %a
+}
+
+; Verify simplify demanded elts.
+
+define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) {
+; CHECK-LABEL: @elts_test_vpermd(
+; CHECK-NEXT:    ret <8 x i32> %a0
+;
+  %1 = insertelement <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %a1, i32 0
+  %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1)
+  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %3
+}
+
+define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_test_vpermps(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x float> [[TMP2]]
+;
+  %1 = insertelement <8 x i32> %a1, i32 0, i32 7
+  %2 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %1)
+  %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
+  ret <8 x float> %3
+}
+
+declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll
new file mode 100644
index 00000000000..9c5080c7792
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll
@@ -0,0 +1,3532 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
+
+define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_add_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_add_ss_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_add_ss_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP8]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_add_ss_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
+  ret <4 x float> %4
+}
+
+define float @test_add_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_add_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
+  %10 = extractelement <4 x float> %9, i32 1
+  ret float %10
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
+
+define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_add_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP4]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_add_sd_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_add_sd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP8]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_add_sd_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
+  ret <2 x double> %2
+}
+
+define double @test_add_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_add_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
+
+define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_sub_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_sub_ss_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_sub_ss_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP8]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_sub_ss_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
+  ret <4 x float> %4
+}
+
+define float @test_sub_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_sub_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
+  %10 = extractelement <4 x float> %9, i32 1
+  ret float %10
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
+
+define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_sub_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP4]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_sub_sd_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_sub_sd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP8]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_sub_sd_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
+  ret <2 x double> %2
+}
+
+define double @test_sub_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_sub_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
+
+define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_mul_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_mul_ss_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mul_ss_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP8]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mul_ss_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
+  ret <4 x float> %4
+}
+
+define float @test_mul_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_mul_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
+  %10 = extractelement <4 x float> %9, i32 1
+  ret float %10
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
+
+define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_mul_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP4]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_mul_sd_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mul_sd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP8]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mul_sd_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
+  ret <2 x double> %2
+}
+
+define double @test_mul_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_mul_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
+
+define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_div_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_div_ss_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_div_ss_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP8]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_div_ss_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
+  ret <4 x float> %4
+}
+
+define float @test_div_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_div_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
+  %10 = extractelement <4 x float> %9, i32 1
+  ret float %10
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
+
+define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_div_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP4]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_div_sd_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_div_sd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP8]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_div_sd_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
+  ret <2 x double> %2
+}
+
+define double @test_div_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_div_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
+
+define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_max_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_max_ss_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
+  ret <4 x float> %4
+}
+
+define float @test_max_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_max_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
+  %10 = extractelement <4 x float> %9, i32 1
+  ret float %10
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
+
+define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_max_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_max_sd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
+  ret <2 x double> %2
+}
+
+define double @test_max_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_max_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
+
+define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_min_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
+  ret <4 x float> %4
+}
+
+define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_min_ss_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
+  ret <4 x float> %4
+}
+
+define float @test_min_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_min_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
+  %10 = extractelement <4 x float> %9, i32 1
+  ret float %10
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
+
+define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_min_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_min_sd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
+  ret <2 x double> %2
+}
+
+define double @test_min_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_min_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
+
+define i8 @test_cmp_ss(<4 x float> %a, <4 x float> %b, i8 %mask) {
+; CHECK-LABEL: @test_cmp_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
+  %7 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %3, <4 x float> %6, i32 3, i8 %mask, i32 4)
+  ret i8 %7
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
+
+define i8 @test_cmp_sd(<2 x double> %a, <2 x double> %b, i8 %mask) {
+; CHECK-LABEL: @test_cmp_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %3 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %1, <2 x double> %2, i32 3, i8 %mask, i32 4)
+  ret i8 %3
+}
+
+define i64 @test(float %f, double %d) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[V03:%.*]] = insertelement <4 x float> undef, float [[F:%.*]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[V03]], i32 4)
+; CHECK-NEXT:    [[V13:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> [[V13]], i32 4)
+; CHECK-NEXT:    [[V23:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[V23]], i32 4)
+; CHECK-NEXT:    [[V33:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> [[V33]], i32 4)
+; CHECK-NEXT:    [[V41:%.*]] = insertelement <2 x double> undef, double [[D:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[V41]], i32 4)
+; CHECK-NEXT:    [[V51:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> [[V51]], i32 4)
+; CHECK-NEXT:    [[V61:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[V61]], i32 4)
+; CHECK-NEXT:    [[V71:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> [[V71]], i32 4)
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+  %v00 = insertelement <4 x float> undef, float %f, i32 0
+  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
+  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
+  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
+  %tmp0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %v03, i32 4)
+  %v10 = insertelement <4 x float> undef, float %f, i32 0
+  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
+  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
+  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
+  %tmp1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %v13, i32 4)
+  %v20 = insertelement <4 x float> undef, float %f, i32 0
+  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
+  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
+  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
+  %tmp2 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %v23, i32 4)
+  %v30 = insertelement <4 x float> undef, float %f, i32 0
+  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
+  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
+  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
+  %tmp3 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %v33, i32 4)
+  %v40 = insertelement <2 x double> undef, double %d, i32 0
+  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
+  %tmp4 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %v41, i32 4)
+  %v50 = insertelement <2 x double> undef, double %d, i32 0
+  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
+  %tmp5 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %v51, i32 4)
+  %v60 = insertelement <2 x double> undef, double %d, i32 0
+  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
+  %tmp6 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %v61, i32 4)
+  %v70 = insertelement <2 x double> undef, double %d, i32 0
+  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
+  %tmp7 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %v71, i32 4)
+  %tmp8 = add i32 %tmp0, %tmp2
+  %tmp9 = add i32 %tmp4, %tmp6
+  %tmp10 = add i32 %tmp8, %tmp9
+  %tmp11 = sext i32 %tmp10 to i64
+  %tmp12 = add i64 %tmp1, %tmp3
+  %tmp13 = add i64 %tmp5, %tmp7
+  %tmp14 = add i64 %tmp12, %tmp13
+  %tmp15 = add i64 %tmp11, %tmp14
+  ret i64 %tmp15
+}
+
+declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32)
+declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32)
+declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32)
+declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32)
+declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32)
+declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32)
+declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32)
+declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32)
+
+define i64 @test2(float %f, double %d) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[V03:%.*]] = insertelement <4 x float> undef, float [[F:%.*]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[V03]], i32 4)
+; CHECK-NEXT:    [[V13:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> [[V13]], i32 4)
+; CHECK-NEXT:    [[V23:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[V23]], i32 4)
+; CHECK-NEXT:    [[V33:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> [[V33]], i32 4)
+; CHECK-NEXT:    [[V41:%.*]] = insertelement <2 x double> undef, double [[D:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[V41]], i32 4)
+; CHECK-NEXT:    [[V51:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> [[V51]], i32 4)
+; CHECK-NEXT:    [[V61:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[V61]], i32 4)
+; CHECK-NEXT:    [[V71:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> [[V71]], i32 4)
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+  %v00 = insertelement <4 x float> undef, float %f, i32 0
+  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
+  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
+  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
+  %tmp0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %v03, i32 4)
+  %v10 = insertelement <4 x float> undef, float %f, i32 0
+  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
+  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
+  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
+  %tmp1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %v13, i32 4)
+  %v20 = insertelement <4 x float> undef, float %f, i32 0
+  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
+  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
+  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
+  %tmp2 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %v23, i32 4)
+  %v30 = insertelement <4 x float> undef, float %f, i32 0
+  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
+  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
+  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
+  %tmp3 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %v33, i32 4)
+  %v40 = insertelement <2 x double> undef, double %d, i32 0
+  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
+  %tmp4 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %v41, i32 4)
+  %v50 = insertelement <2 x double> undef, double %d, i32 0
+  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
+  %tmp5 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %v51, i32 4)
+  %v60 = insertelement <2 x double> undef, double %d, i32 0
+  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
+  %tmp6 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %v61, i32 4)
+  %v70 = insertelement <2 x double> undef, double %d, i32 0
+  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
+  %tmp7 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %v71, i32 4)
+  %tmp8 = add i32 %tmp0, %tmp2
+  %tmp9 = add i32 %tmp4, %tmp6
+  %tmp10 = add i32 %tmp8, %tmp9
+  %tmp11 = sext i32 %tmp10 to i64
+  %tmp12 = add i64 %tmp1, %tmp3
+  %tmp13 = add i64 %tmp5, %tmp7
+  %tmp14 = add i64 %tmp12, %tmp13
+  %tmp15 = add i64 %tmp11, %tmp14
+  ret i64 %tmp15
+}
+
+declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32)
+declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32)
+declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32)
+declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32)
+declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32)
+declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32)
+declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32)
+declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32)
+
+declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
+declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32)
+declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
+declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
+declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
+declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
+declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
+declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
+
+define <4 x float> @test_rndscale_ss_floor(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_ss_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[K:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[SRC1:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.floor.f32(float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[DST:%.*]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP2]], float [[TMP5]], float [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[SRC0:%.*]], float [[TMP6]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP7]]
+;
+  %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k, i32 1, i32 4)
+  ret <4 x float> %1
+}
+
+define <4 x float> @test_rndscale_ss_ceil(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_ss_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[K:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[SRC1:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.ceil.f32(float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[DST:%.*]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP2]], float [[TMP5]], float [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[SRC0:%.*]], float [[TMP6]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP7]]
+;
+  %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %src0, <4 x float> %src1, <4 x float> %dst, i8 %k, i32 2, i32 4)
+  ret <4 x float> %1
+}
+
+define <2 x double> @test_rndscale_sd_floor(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_sd_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[K:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[SRC1:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.floor.f64(double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[DST:%.*]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP2]], double [[TMP5]], double [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[SRC0:%.*]], double [[TMP6]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP7]]
+;
+  %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k, i32 1, i32 4)
+  ret <2 x double> %1
+}
+
+define <2 x double> @test_rndscale_sd_ceil(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_sd_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[K:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[SRC1:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.ceil.f64(double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[DST:%.*]], i64 0
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP2]], double [[TMP5]], double [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[SRC0:%.*]], double [[TMP6]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP7]]
+;
+  %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %src0, <2 x double> %src1, <2 x double> %dst, i8 %k, i32 2, i32 4)
+  ret <2 x double> %1
+}
+
+define <4 x float> @test_rndscale_ps_128_floor(<4 x float> %src, <4 x float> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_ps_128_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP1]], <4 x float> [[DST:%.*]]
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %src, i32 1, <4 x float> %dst, i8 %k)
+  ret <4 x float> %1
+}
+
+define <4 x float> @test_rndscale_ps_128_ceil(<4 x float> %src, <4 x float> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_ps_128_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP1]], <4 x float> [[DST:%.*]]
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %src, i32 2, <4 x float> %dst, i8 %k)
+  ret <4 x float> %1
+}
+
+define <8 x float> @test_rndscale_ps_256_floor(<8 x float> %src, <8 x float> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_ps_256_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[DST:%.*]]
+; CHECK-NEXT:    ret <8 x float> [[TMP3]]
+;
+  %1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %src, i32 1, <8 x float> %dst, i8 %k)
+  ret <8 x float> %1
+}
+
+define <8 x float> @test_rndscale_ps_256_ceil(<8 x float> %src, <8 x float> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_ps_256_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[DST:%.*]]
+; CHECK-NEXT:    ret <8 x float> [[TMP3]]
+;
+  %1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %src, i32 2, <8 x float> %dst, i8 %k)
+  ret <8 x float> %1
+}
+
+define <16 x float> @test_rndscale_ps_512_floor(<16 x float> %src, <16 x float> %dst, i16 %k) {
+; CHECK-LABEL: @test_rndscale_ps_512_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.floor.v16f32(<16 x float> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[K:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[DST:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %src, i32 1, <16 x float> %dst, i16 %k, i32 4)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_rndscale_ps_512_ceil(<16 x float> %src, <16 x float> %dst, i16 %k) {
+; CHECK-LABEL: @test_rndscale_ps_512_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.ceil.v16f32(<16 x float> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[K:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[DST:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %src, i32 2, <16 x float> %dst, i16 %k, i32 4)
+  ret <16 x float> %1
+}
+
+define <2 x double> @test_rndscale_pd_128_floor(<2 x double> %src, <2 x double> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_pd_128_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP1]], <2 x double> [[DST:%.*]]
+; CHECK-NEXT:    ret <2 x double> [[TMP4]]
+;
+  %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %src, i32 1, <2 x double> %dst, i8 %k)
+  ret <2 x double> %1
+}
+
+define <2 x double> @test_rndscale_pd_128_ceil(<2 x double> %src, <2 x double> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_pd_128_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP1]], <2 x double> [[DST:%.*]]
+; CHECK-NEXT:    ret <2 x double> [[TMP4]]
+;
+  %1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %src, i32 2, <2 x double> %dst, i8 %k)
+  ret <2 x double> %1
+}
+
+define <4 x double> @test_rndscale_pd_256_floor(<4 x double> %src, <4 x double> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_pd_256_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> [[DST:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP4]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %src, i32 1, <4 x double> %dst, i8 %k)
+  ret <4 x double> %1
+}
+
+define <4 x double> @test_rndscale_pd_256_ceil(<4 x double> %src, <4 x double> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_pd_256_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP1]], <4 x double> [[DST:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP4]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %src, i32 2, <4 x double> %dst, i8 %k)
+  ret <4 x double> %1
+}
+
+define <8 x double> @test_rndscale_pd_512_floor(<8 x double> %src, <8 x double> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_pd_512_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.floor.v8f64(<8 x double> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[DST:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %src, i32 1, <8 x double> %dst, i8 %k, i32 4)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_rndscale_pd_512_ceil(<8 x double> %src, <8 x double> %dst, i8 %k) {
+; CHECK-LABEL: @test_rndscale_pd_512_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.ceil.v8f64(<8 x double> [[SRC:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[K:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[DST:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %src, i32 2, <8 x double> %dst, i8 %k, i32 4)
+  ret <8 x double> %1
+}
+
+declare float @llvm.fma.f32(float, float, float) #1
+
+define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask_vfmadd_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP8]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
+  %7 = extractelement <4 x float> %a, i64 0
+  %8 = extractelement <4 x float> %3, i64 0
+  %9 = extractelement <4 x float> %6, i64 0
+  %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
+  %11 = bitcast i8 %mask to <8 x i1>
+  %12 = extractelement <8 x i1> %11, i64 0
+  %13 = select i1 %12, float %10, float %7
+  %14 = insertelement <4 x float> %a, float %13, i64 0
+  ret <4 x float> %14
+}
+
+define float @test_mask_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask_vfmadd_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]]
+; CHECK-NEXT:    ret float [[TMP7]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = extractelement <4 x float> %3, i64 0
+  %5 = extractelement <4 x float> %b, i64 0
+  %6 = extractelement <4 x float> %c, i64 0
+  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
+  %8 = bitcast i8 %mask to <8 x i1>
+  %9 = extractelement <8 x i1> %8, i64 0
+  %10 = select i1 %9, float %7, float %4
+  %11 = insertelement <4 x float> %3, float %10, i64 0
+  %12 = extractelement <4 x float> %11, i32 0
+  ret float %12
+}
+
+define float @test_mask_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask_vfmadd_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = extractelement <4 x float> %3, i64 0
+  %5 = extractelement <4 x float> %b, i64 0
+  %6 = extractelement <4 x float> %c, i64 0
+  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
+  %8 = bitcast i8 %mask to <8 x i1>
+  %9 = extractelement <8 x i1> %8, i64 0
+  %10 = select i1 %9, float %7, float %4
+  %11 = insertelement <4 x float> %3, float %10, i64 0
+  %12 = extractelement <4 x float> %11, i32 1
+  ret float %12
+}
+
+declare double @llvm.fma.f64(double, double, double) #1
+
+define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask_vfmadd_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP8]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
+  %3 = extractelement <2 x double> %a, i64 0
+  %4 = extractelement <2 x double> %1, i64 0
+  %5 = extractelement <2 x double> %2, i64 0
+  %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
+  %7 = bitcast i8 %mask to <8 x i1>
+  %8 = extractelement <8 x i1> %7, i64 0
+  %9 = select i1 %8, double %6, double %3
+  %10 = insertelement <2 x double> %a, double %9, i64 0
+  ret <2 x double> %10
+}
+
+define double @test_mask_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask_vfmadd_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]]
+; CHECK-NEXT:    ret double [[TMP7]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = extractelement <2 x double> %1, i64 0
+  %3 = extractelement <2 x double> %b, i64 0
+  %4 = extractelement <2 x double> %c, i64 0
+  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+  %6 = bitcast i8 %mask to <8 x i1>
+  %7 = extractelement <8 x i1> %6, i64 0
+  %8 = select i1 %7, double %5, double %2
+  %9 = insertelement <2 x double> %1, double %8, i64 0
+  %10 = extractelement <2 x double> %9, i32 0
+  ret double %10
+}
+
+define double @test_mask_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask_vfmadd_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = extractelement <2 x double> %1, i64 0
+  %3 = extractelement <2 x double> %b, i64 0
+  %4 = extractelement <2 x double> %c, i64 0
+  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+  %6 = bitcast i8 %mask to <8 x i1>
+  %7 = extractelement <8 x i1> %6, i64 0
+  %8 = select i1 %7, double %5, double %2
+  %9 = insertelement <2 x double> %1, double %8, i64 0
+  %10 = extractelement <2 x double> %9, i32 1
+  ret double %10
+}
+
+define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_maskz_vfmadd_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP8]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
+  %7 = extractelement <4 x float> %a, i64 0
+  %8 = extractelement <4 x float> %3, i64 0
+  %9 = extractelement <4 x float> %6, i64 0
+  %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
+  %11 = bitcast i8 %mask to <8 x i1>
+  %12 = extractelement <8 x i1> %11, i64 0
+  %13 = select i1 %12, float %10, float 0.000000e+00
+  %14 = insertelement <4 x float> %a, float %13, i64 0
+  ret <4 x float> %14
+}
+
+define float @test_maskz_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_maskz_vfmadd_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
+; CHECK-NEXT:    ret float [[TMP7]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = extractelement <4 x float> %3, i64 0
+  %5 = extractelement <4 x float> %b, i64 0
+  %6 = extractelement <4 x float> %c, i64 0
+  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
+  %8 = bitcast i8 %mask to <8 x i1>
+  %9 = extractelement <8 x i1> %8, i64 0
+  %10 = select i1 %9, float %7, float 0.000000e+00
+  %11 = insertelement <4 x float> %3, float %10, i64 0
+  %12 = extractelement <4 x float> %11, i32 0
+  ret float %12
+}
+
+define float @test_maskz_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_maskz_vfmadd_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = extractelement <4 x float> %3, i64 0
+  %5 = extractelement <4 x float> %b, i64 0
+  %6 = extractelement <4 x float> %c, i64 0
+  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
+  %8 = bitcast i8 %mask to <8 x i1>
+  %9 = extractelement <8 x i1> %8, i64 0
+  %10 = select i1 %9, float %7, float 0.000000e+00
+  %11 = insertelement <4 x float> %3, float %10, i64 0
+  %12 = extractelement <4 x float> %11, i32 1
+  ret float %12
+}
+
+define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_maskz_vfmadd_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP8]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
+  %3 = extractelement <2 x double> %a, i64 0
+  %4 = extractelement <2 x double> %1, i64 0
+  %5 = extractelement <2 x double> %2, i64 0
+  %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
+  %7 = bitcast i8 %mask to <8 x i1>
+  %8 = extractelement <8 x i1> %7, i64 0
+  %9 = select i1 %8, double %6, double 0.000000e+00
+  %10 = insertelement <2 x double> %a, double %9, i64 0
+  ret <2 x double> %10
+}
+
+define double @test_maskz_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_maskz_vfmadd_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
+; CHECK-NEXT:    ret double [[TMP7]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = extractelement <2 x double> %1, i64 0
+  %3 = extractelement <2 x double> %b, i64 0
+  %4 = extractelement <2 x double> %c, i64 0
+  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+  %6 = bitcast i8 %mask to <8 x i1>
+  %7 = extractelement <8 x i1> %6, i64 0
+  %8 = select i1 %7, double %5, double 0.000000e+00
+  %9 = insertelement <2 x double> %1, double %8, i64 0
+  %10 = extractelement <2 x double> %9, i32 0
+  ret double %10
+}
+
+define double @test_maskz_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_maskz_vfmadd_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = extractelement <2 x double> %1, i64 0
+  %3 = extractelement <2 x double> %b, i64 0
+  %4 = extractelement <2 x double> %c, i64 0
+  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+  %6 = bitcast i8 %mask to <8 x i1>
+  %7 = extractelement <8 x i1> %6, i64 0
+  %8 = select i1 %7, double %5, double 0.000000e+00
+  %9 = insertelement <2 x double> %1, double %8, i64 0
+  %10 = extractelement <2 x double> %9, i32 1
+  ret double %10
+}
+
+define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmadd_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[C]], float [[TMP7]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP8]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
+  %7 = extractelement <4 x float> %3, i64 0
+  %8 = extractelement <4 x float> %6, i64 0
+  %9 = extractelement <4 x float> %c, i64 0
+  %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
+  %11 = bitcast i8 %mask to <8 x i1>
+  %12 = extractelement <8 x i1> %11, i64 0
+  %13 = select i1 %12, float %10, float %9
+  %14 = insertelement <4 x float> %c, float %13, i64 0
+  ret <4 x float> %14
+}
+
+define float @test_mask3_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmadd_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]]
+; CHECK-NEXT:    ret float [[TMP7]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = extractelement <4 x float> %a, i64 0
+  %5 = extractelement <4 x float> %b, i64 0
+  %6 = extractelement <4 x float> %3, i64 0
+  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
+  %8 = bitcast i8 %mask to <8 x i1>
+  %9 = extractelement <8 x i1> %8, i64 0
+  %10 = select i1 %9, float %7, float %6
+  %11 = insertelement <4 x float> %3, float %10, i64 0
+  %12 = extractelement <4 x float> %11, i32 0
+  ret float %12
+}
+
+define float @test_mask3_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmadd_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = extractelement <4 x float> %a, i64 0
+  %5 = extractelement <4 x float> %b, i64 0
+  %6 = extractelement <4 x float> %3, i64 0
+  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
+  %8 = bitcast i8 %mask to <8 x i1>
+  %9 = extractelement <8 x i1> %8, i64 0
+  %10 = select i1 %9, float %7, float %6
+  %11 = insertelement <4 x float> %3, float %10, i64 0
+  %12 = extractelement <4 x float> %11, i32 1
+  ret float %12
+}
+
+define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmadd_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[C]], double [[TMP7]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP8]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %3 = extractelement <2 x double> %1, i64 0
+  %4 = extractelement <2 x double> %2, i64 0
+  %5 = extractelement <2 x double> %c, i64 0
+  %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
+  %7 = bitcast i8 %mask to <8 x i1>
+  %8 = extractelement <8 x i1> %7, i64 0
+  %9 = select i1 %8, double %6, double %5
+  %10 = insertelement <2 x double> %c, double %9, i64 0
+  ret <2 x double> %10
+}
+
+define double @test_mask3_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmadd_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]]
+; CHECK-NEXT:    ret double [[TMP7]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = extractelement <2 x double> %a, i64 0
+  %3 = extractelement <2 x double> %b, i64 0
+  %4 = extractelement <2 x double> %1, i64 0
+  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+  %6 = bitcast i8 %mask to <8 x i1>
+  %7 = extractelement <8 x i1> %6, i64 0
+  %8 = select i1 %7, double %5, double %4
+  %9 = insertelement <2 x double> %1, double %8, i64 0
+  %10 = extractelement <2 x double> %9, i32 0
+  ret double %10
+}
+
+define double @test_mask3_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmadd_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = extractelement <2 x double> %a, i64 0
+  %3 = extractelement <2 x double> %b, i64 0
+  %4 = extractelement <2 x double> %1, i64 0
+  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+  %6 = bitcast i8 %mask to <8 x i1>
+  %7 = extractelement <8 x i1> %6, i64 0
+  %8 = select i1 %7, double %5, double %4
+  %9 = insertelement <2 x double> %1, double %8, i64 0
+  %10 = extractelement <2 x double> %9, i32 1
+  ret double %10
+}
+
+define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmsub_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP10]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
+  %7 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
+  %8 = extractelement <4 x float> %3, i64 0
+  %9 = extractelement <4 x float> %6, i64 0
+  %10 = extractelement <4 x float> %7, i64 0
+  %11 = call float @llvm.fma.f32(float %8, float %9, float %10)
+  %12 = extractelement <4 x float> %c, i64 0
+  %13 = bitcast i8 %mask to <8 x i1>
+  %14 = extractelement <8 x i1> %13, i64 0
+  %15 = select i1 %14, float %11, float %12
+  %16 = insertelement <4 x float> %c, float %15, i64 0
+  ret <4 x float> %16
+}
+
+define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmsub_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
+; CHECK-NEXT:    ret float [[TMP9]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
+  %5 = extractelement <4 x float> %a, i64 0
+  %6 = extractelement <4 x float> %b, i64 0
+  %7 = extractelement <4 x float> %4, i64 0
+  %8 = call float @llvm.fma.f32(float %5, float %6, float %7)
+  %9 = extractelement <4 x float> %3, i64 0
+  %10 = bitcast i8 %mask to <8 x i1>
+  %11 = extractelement <8 x i1> %10, i64 0
+  %12 = select i1 %11, float %8, float %9
+  %13 = insertelement <4 x float> %3, float %12, i64 0
+  %14 = extractelement <4 x float> %13, i32 0
+  ret float %14
+}
+
+define float @test_mask3_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmsub_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
+  %5 = extractelement <4 x float> %a, i64 0
+  %6 = extractelement <4 x float> %b, i64 0
+  %7 = extractelement <4 x float> %4, i64 0
+  %8 = call float @llvm.fma.f32(float %5, float %6, float %7)
+  %9 = extractelement <4 x float> %3, i64 0
+  %10 = bitcast i8 %mask to <8 x i1>
+  %11 = extractelement <8 x i1> %10, i64 0
+  %12 = select i1 %11, float %8, float %9
+  %13 = insertelement <4 x float> %3, float %12, i64 0
+  %14 = extractelement <4 x float> %13, i32 1
+  ret float %14
+}
+
+define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmsub_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP10]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
+  %4 = extractelement <2 x double> %1, i64 0
+  %5 = extractelement <2 x double> %2, i64 0
+  %6 = extractelement <2 x double> %3, i64 0
+  %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
+  %8 = extractelement <2 x double> %c, i64 0
+  %9 = bitcast i8 %mask to <8 x i1>
+  %10 = extractelement <8 x i1> %9, i64 0
+  %11 = select i1 %10, double %7, double %8
+  %12 = insertelement <2 x double> %c, double %11, i64 0
+  ret <2 x double> %12
+}
+
+define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmsub_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
+; CHECK-NEXT:    ret double [[TMP9]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
+  %3 = extractelement <2 x double> %a, i64 0
+  %4 = extractelement <2 x double> %b, i64 0
+  %5 = extractelement <2 x double> %2, i64 0
+  %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
+  %7 = extractelement <2 x double> %1, i64 0
+  %8 = bitcast i8 %mask to <8 x i1>
+  %9 = extractelement <8 x i1> %8, i64 0
+  %10 = select i1 %9, double %6, double %7
+  %11 = insertelement <2 x double> %1, double %10, i64 0
+  %12 = extractelement <2 x double> %11, i32 0
+  ret double %12
+}
+
+define double @test_mask3_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfmsub_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
+  %3 = extractelement <2 x double> %a, i64 0
+  %4 = extractelement <2 x double> %b, i64 0
+  %5 = extractelement <2 x double> %2, i64 0
+  %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
+  %7 = extractelement <2 x double> %1, i64 0
+  %8 = bitcast i8 %mask to <8 x i1>
+  %9 = extractelement <8 x i1> %8, i64 0
+  %10 = select i1 %9, double %6, double %7
+  %11 = insertelement <2 x double> %1, double %10, i64 0
+  %12 = extractelement <2 x double> %11, i32 1
+  ret double %12
+}
+
+define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfnmsub_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP11]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
+  %7 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
+  %8 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
+  %9 = extractelement <4 x float> %7, i64 0
+  %10 = extractelement <4 x float> %6, i64 0
+  %11 = extractelement <4 x float> %8, i64 0
+  %12 = call float @llvm.fma.f32(float %9, float %10, float %11)
+  %13 = extractelement <4 x float> %c, i64 0
+  %14 = bitcast i8 %mask to <8 x i1>
+  %15 = extractelement <8 x i1> %14, i64 0
+  %16 = select i1 %15, float %12, float %13
+  %17 = insertelement <4 x float> %c, float %16, i64 0
+  ret <4 x float> %17
+}
+
+define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfnmsub_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
+; CHECK-NEXT:    ret float [[TMP10]]
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  %5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
+  %6 = extractelement <4 x float> %4, i64 0
+  %7 = extractelement <4 x float> %b, i64 0
+  %8 = extractelement <4 x float> %5, i64 0
+  %9 = call float @llvm.fma.f32(float %6, float %7, float %8)
+  %10 = extractelement <4 x float> %3, i64 0
+  %11 = bitcast i8 %mask to <8 x i1>
+  %12 = extractelement <8 x i1> %11, i64 0
+  %13 = select i1 %12, float %9, float %10
+  %14 = insertelement <4 x float> %3, float %13, i64 0
+  %15 = extractelement <4 x float> %14, i32 0
+  ret float %15
+}
+
+define float @test_mask3_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfnmsub_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  %5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
+  %6 = extractelement <4 x float> %4, i64 0
+  %7 = extractelement <4 x float> %b, i64 0
+  %8 = extractelement <4 x float> %5, i64 0
+  %9 = call float @llvm.fma.f32(float %6, float %7, float %8)
+  %10 = extractelement <4 x float> %3, i64 0
+  %11 = bitcast i8 %mask to <8 x i1>
+  %12 = extractelement <8 x i1> %11, i64 0
+  %13 = select i1 %12, float %9, float %10
+  %14 = insertelement <4 x float> %3, float %13, i64 0
+  %15 = extractelement <4 x float> %14, i32 1
+  ret float %15
+}
+
+define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfnmsub_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP11]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
+  %4 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
+  %5 = extractelement <2 x double> %3, i64 0
+  %6 = extractelement <2 x double> %2, i64 0
+  %7 = extractelement <2 x double> %4, i64 0
+  %8 = call double @llvm.fma.f64(double %5, double %6, double %7)
+  %9 = extractelement <2 x double> %c, i64 0
+  %10 = bitcast i8 %mask to <8 x i1>
+  %11 = extractelement <8 x i1> %10, i64 0
+  %12 = select i1 %11, double %8, double %9
+  %13 = insertelement <2 x double> %c, double %12, i64 0
+  ret <2 x double> %13
+}
+
+define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfnmsub_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
+; CHECK-NEXT:    ret double [[TMP10]]
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
+  %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
+  %4 = extractelement <2 x double> %2, i64 0
+  %5 = extractelement <2 x double> %b, i64 0
+  %6 = extractelement <2 x double> %3, i64 0
+  %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
+  %8 = extractelement <2 x double> %1, i64 0
+  %9 = bitcast i8 %mask to <8 x i1>
+  %10 = extractelement <8 x i1> %9, i64 0
+  %11 = select i1 %10, double %7, double %8
+  %12 = insertelement <2 x double> %1, double %11, i64 0
+  %13 = extractelement <2 x double> %12, i32 0
+  ret double %13
+}
+
+define double @test_mask3_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mask3_vfnmsub_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
+  %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
+  %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
+  %4 = extractelement <2 x double> %2, i64 0
+  %5 = extractelement <2 x double> %b, i64 0
+  %6 = extractelement <2 x double> %3, i64 0
+  %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
+  %8 = extractelement <2 x double> %1, i64 0
+  %9 = bitcast i8 %mask to <8 x i1>
+  %10 = extractelement <8 x i1> %9, i64 0
+  %11 = select i1 %10, double %7, double %8
+  %12 = insertelement <2 x double> %1, double %11, i64 0
+  %13 = extractelement <2 x double> %12, i32 1
+  ret double %13
+}
+
+declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
+
+define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) {
+; CHECK-LABEL: @identity_test_permvar_si_256(
+; CHECK-NEXT:    ret <8 x i32> [[A0:%.*]]
+;
+  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: @identity_test_permvar_si_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
+;
+  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
+  ret <8 x i32> %3
+}
+
+define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) {
+; CHECK-LABEL: @zero_test_permvar_si_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: @zero_test_permvar_si_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
+;
+  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
+  ret <8 x i32> %3
+}
+
+define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_si_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_si_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
+;
+  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
+  ret <8 x i32> %3
+}
+
+define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) {
+; CHECK-LABEL: @undef_test_permvar_si_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
+; CHECK-LABEL: @undef_test_permvar_si_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
+;
+  %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
+  ret <8 x i32> %3
+}
+
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
+
+define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) {
+; CHECK-LABEL: @identity_test_permvar_sf_256(
+; CHECK-NEXT:    ret <8 x float> [[A0:%.*]]
+;
+  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+  ret <8 x float> %1
+}
+
+define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: @identity_test_permvar_sf_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x float> [[TMP2]]
+;
+  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
+  ret <8 x float> %3
+}
+
+define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) {
+; CHECK-LABEL: @zero_test_permvar_sf_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
+  ret <8 x float> %1
+}
+
+define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: @zero_test_permvar_sf_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x float> [[TMP3]]
+;
+  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
+  ret <8 x float> %3
+}
+
+define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_sf_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %1
+}
+
+define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x float> [[TMP3]]
+;
+  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
+  ret <8 x float> %3
+}
+
+define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) {
+; CHECK-LABEL: @undef_test_permvar_sf_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %1
+}
+
+define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
+; CHECK-LABEL: @undef_test_permvar_sf_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x float> [[TMP3]]
+;
+  %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
+  ret <8 x float> %3
+}
+
+declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
+
+define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) {
+; CHECK-LABEL: @identity_test_permvar_di_256(
+; CHECK-NEXT:    ret <4 x i64> [[A0:%.*]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: @identity_test_permvar_di_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP2]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
+  ret <4 x i64> %3
+}
+
+define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) {
+; CHECK-LABEL: @zero_test_permvar_di_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: @zero_test_permvar_di_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
+  ret <4 x i64> %3
+}
+
+define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_di_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_di_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
+  ret <4 x i64> %3
+}
+
+define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) {
+; CHECK-LABEL: @undef_test_permvar_di_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: @undef_test_permvar_di_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
+;
+  %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
+  ret <4 x i64> %3
+}
+
+declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
+
+define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) {
+; CHECK-LABEL: @identity_test_permvar_df_256(
+; CHECK-NEXT:    ret <4 x double> [[A0:%.*]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
+  ret <4 x double> %1
+}
+
+define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: @identity_test_permvar_df_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP2]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
+  ret <4 x double> %3
+}
+
+define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) {
+; CHECK-LABEL: @zero_test_permvar_df_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer)
+  ret <4 x double> %1
+}
+
+define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: @zero_test_permvar_df_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
+  ret <4 x double> %3
+}
+
+define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_df_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
+  ret <4 x double> %1
+}
+
+define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_df_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
+  ret <4 x double> %3
+}
+
+define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) {
+; CHECK-LABEL: @undef_test_permvar_df_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
+  ret <4 x double> %1
+}
+
+define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: @undef_test_permvar_df_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[TMP3]]
+;
+  %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
+  ret <4 x double> %3
+}
+
+declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
+
+define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) {
+; CHECK-LABEL: @identity_test_permvar_si_512(
+; CHECK-NEXT:    ret <16 x i32> [[A0:%.*]]
+;
+  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
+; CHECK-LABEL: @identity_test_permvar_si_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
+;
+  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
+  ret <16 x i32> %3
+}
+
+define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) {
+; CHECK-LABEL: @zero_test_permvar_si_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
+; CHECK-LABEL: @zero_test_permvar_si_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
+;
+  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
+  ret <16 x i32> %3
+}
+
+define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_si_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_si_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
+;
+  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
+  ret <16 x i32> %3
+}
+
+define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) {
+; CHECK-LABEL: @undef_test_permvar_si_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
+; CHECK-LABEL: @undef_test_permvar_si_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
+;
+  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
+  ret <16 x i32> %3
+}
+
+declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
+
+define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) {
+; CHECK-LABEL: @identity_test_permvar_sf_512(
+; CHECK-NEXT:    ret <16 x float> [[A0:%.*]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
+  ret <16 x float> %1
+}
+
+define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
+; CHECK-LABEL: @identity_test_permvar_sf_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP2]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+  ret <16 x float> %3
+}
+
+define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) {
+; CHECK-LABEL: @zero_test_permvar_sf_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer)
+  ret <16 x float> %1
+}
+
+define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
+; CHECK-LABEL: @zero_test_permvar_sf_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+  ret <16 x float> %3
+}
+
+define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_sf_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <16 x float> %1
+}
+
+define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+  ret <16 x float> %3
+}
+
+define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) {
+; CHECK-LABEL: @undef_test_permvar_sf_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <16 x float> %1
+}
+
+define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
+; CHECK-LABEL: @undef_test_permvar_sf_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+  ret <16 x float> %3
+}
+
+declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
+
+define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) {
+; CHECK-LABEL: @identity_test_permvar_di_512(
+; CHECK-NEXT:    ret <8 x i64> [[A0:%.*]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: @identity_test_permvar_di_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
+  ret <8 x i64> %3
+}
+
+define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) {
+; CHECK-LABEL: @zero_test_permvar_di_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: @zero_test_permvar_di_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
+  ret <8 x i64> %3
+}
+
+define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_di_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_di_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
+  ret <8 x i64> %3
+}
+
+define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) {
+; CHECK-LABEL: @undef_test_permvar_di_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
+; CHECK-LABEL: @undef_test_permvar_di_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
+  ret <8 x i64> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
+
+define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) {
+; CHECK-LABEL: @identity_test_permvar_df_512(
+; CHECK-NEXT:    ret <8 x double> [[A0:%.*]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
+  ret <8 x double> %1
+}
+
+define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: @identity_test_permvar_df_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP2]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+  ret <8 x double> %3
+}
+
+define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) {
+; CHECK-LABEL: @zero_test_permvar_df_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer)
+  ret <8 x double> %1
+}
+
+define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: @zero_test_permvar_df_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+  ret <8 x double> %3
+}
+
+define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_df_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+  ret <8 x double> %1
+}
+
+define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_df_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+  ret <8 x double> %3
+}
+
+define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) {
+; CHECK-LABEL: @undef_test_permvar_df_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+  ret <8 x double> %1
+}
+
+define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
+; CHECK-LABEL: @undef_test_permvar_df_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+  ret <8 x double> %3
+}
+
+declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
+
+define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) {
+; CHECK-LABEL: @identity_test_permvar_hi_128(
+; CHECK-NEXT:    ret <8 x i16> [[A0:%.*]]
+;
+  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
+; CHECK-LABEL: @identity_test_permvar_hi_128_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) {
+; CHECK-LABEL: @zero_test_permvar_hi_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
+; CHECK-LABEL: @zero_test_permvar_hi_128_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+;
+  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_hi_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+;
+  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) {
+; CHECK-LABEL: @undef_test_permvar_hi_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
+; CHECK-LABEL: @undef_test_permvar_hi_128_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+;
+  %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
+  ret <8 x i16> %3
+}
+
+declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
+
+define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) {
+; CHECK-LABEL: @identity_test_permvar_hi_256(
+; CHECK-NEXT:    ret <16 x i16> [[A0:%.*]]
+;
+  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
+; CHECK-LABEL: @identity_test_permvar_hi_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
+;
+  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
+  ret <16 x i16> %3
+}
+
+define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) {
+; CHECK-LABEL: @zero_test_permvar_hi_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
+; CHECK-LABEL: @zero_test_permvar_hi_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
+;
+  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
+  ret <16 x i16> %3
+}
+
+define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_hi_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
+;
+  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
+  ret <16 x i16> %3
+}
+
+define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) {
+; CHECK-LABEL: @undef_test_permvar_hi_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
+; CHECK-LABEL: @undef_test_permvar_hi_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
+;
+  %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
+  ret <16 x i16> %3
+}
+
+declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
+
+define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) {
+; CHECK-LABEL: @identity_test_permvar_hi_512(
+; CHECK-NEXT:    ret <32 x i16> [[A0:%.*]]
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
+; CHECK-LABEL: @identity_test_permvar_hi_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>)
+  %2 = bitcast i32 %mask to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
+  ret <32 x i16> %3
+}
+
+define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) {
+; CHECK-LABEL: @zero_test_permvar_hi_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
+; CHECK-LABEL: @zero_test_permvar_hi_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
+  %2 = bitcast i32 %mask to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
+  ret <32 x i16> %3
+}
+
+define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_hi_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  %2 = bitcast i32 %mask to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
+  ret <32 x i16> %3
+}
+
+define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) {
+; CHECK-LABEL: @undef_test_permvar_hi_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
+; CHECK-LABEL: @undef_test_permvar_hi_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  %2 = bitcast i32 %mask to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
+  ret <32 x i16> %3
+}
+
+declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>)
+
+define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) {
+; CHECK-LABEL: @identity_test_permvar_qi_128(
+; CHECK-NEXT:    ret <16 x i8> [[A0:%.*]]
+;
+  %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
+; CHECK-LABEL: @identity_test_permvar_qi_128_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+;
+  %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
+  ret <16 x i8> %3
+}
+
+define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) {
+; CHECK-LABEL: @zero_test_permvar_qi_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
+; CHECK-LABEL: @zero_test_permvar_qi_128_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+;
+  %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
+  ret <16 x i8> %3
+}
+
+define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_qi_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+;
+  %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
+  ret <16 x i8> %3
+}
+
+define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) {
+; CHECK-LABEL: @undef_test_permvar_qi_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
+; CHECK-LABEL: @undef_test_permvar_qi_128_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+;
+  %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
+  ret <16 x i8> %3
+}
+
+declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>)
+
+define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) {
+; CHECK-LABEL: @identity_test_permvar_qi_256(
+; CHECK-NEXT:    ret <32 x i8> [[A0:%.*]]
+;
+  %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
+; CHECK-LABEL: @identity_test_permvar_qi_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <32 x i8> [[TMP2]]
+;
+  %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
+  %2 = bitcast i32 %mask to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
+  ret <32 x i8> %3
+}
+
+define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) {
+; CHECK-LABEL: @zero_test_permvar_qi_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
+; CHECK-LABEL: @zero_test_permvar_qi_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
+;
+  %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer)
+  %2 = bitcast i32 %mask to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
+  ret <32 x i8> %3
+}
+
+define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_qi_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
+;
+  %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  %2 = bitcast i32 %mask to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
+  ret <32 x i8> %3
+}
+
+define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) {
+; CHECK-LABEL: @undef_test_permvar_qi_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
+; CHECK-LABEL: @undef_test_permvar_qi_256_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
+;
+  %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  %2 = bitcast i32 %mask to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
+  ret <32 x i8> %3
+}
+
+declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>)
+
+define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) {
+; CHECK-LABEL: @identity_test_permvar_qi_512(
+; CHECK-NEXT:    ret <64 x i8> [[A0:%.*]]
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
+; CHECK-LABEL: @identity_test_permvar_qi_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <64 x i8> [[TMP2]]
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>)
+  %2 = bitcast i64 %mask to <64 x i1>
+  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
+  ret <64 x i8> %3
+}
+
+define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) {
+; CHECK-LABEL: @zero_test_permvar_qi_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
+; CHECK-LABEL: @zero_test_permvar_qi_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer)
+  %2 = bitcast i64 %mask to <64 x i1>
+  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
+  ret <64 x i8> %3
+}
+
+define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) {
+; CHECK-LABEL: @shuffle_test_permvar_qi_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
+; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  %2 = bitcast i64 %mask to <64 x i1>
+  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
+  ret <64 x i8> %3
+}
+
+define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) {
+; CHECK-LABEL: @undef_test_permvar_qi_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
+; CHECK-LABEL: @undef_test_permvar_qi_512_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
+; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
+  %2 = bitcast i64 %mask to <64 x i1>
+  %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
+  ret <64 x i8> %3
+}
+
+declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: @test_add_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_add_ps_round(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: @test_add_ps_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_add_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: @test_add_ps_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
+  ret <16 x float> %3
+}
+
+define <16 x float> @test_add_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: @test_add_ps_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
+  ret <16 x float> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double>, <8 x double>, i32)
+
+define <8 x double> @test_add_pd(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: @test_add_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_add_pd_round(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: @test_add_pd_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_add_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_add_pd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
+  ret <8 x double> %3
+}
+
+define <8 x double> @test_add_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_add_pd_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
+  ret <8 x double> %3
+}
+
+declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_sub_ps(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: @test_sub_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_sub_ps_round(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: @test_sub_ps_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_sub_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: @test_sub_ps_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
+  ret <16 x float> %3
+}
+
+define <16 x float> @test_sub_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: @test_sub_ps_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
+  ret <16 x float> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double>, <8 x double>, i32)
+
+define <8 x double> @test_sub_pd(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: @test_sub_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_sub_pd_round(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: @test_sub_pd_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_sub_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_sub_pd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
+  ret <8 x double> %3
+}
+
+define <8 x double> @test_sub_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_sub_pd_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
+  ret <8 x double> %3
+}
+
+declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_mul_ps(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: @test_mul_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_mul_ps_round(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: @test_mul_ps_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_mul_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: @test_mul_ps_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
+  ret <16 x float> %3
+}
+
+define <16 x float> @test_mul_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: @test_mul_ps_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
+  ret <16 x float> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
+
+define <8 x double> @test_mul_pd(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: @test_mul_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_mul_pd_round(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: @test_mul_pd_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_mul_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mul_pd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
+  ret <8 x double> %3
+}
+
+define <8 x double> @test_mul_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_mul_pd_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
+  ret <8 x double> %3
+}
+
+declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_div_ps(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: @test_div_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_div_ps_round(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: @test_div_ps_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
+  ret <16 x float> %1
+}
+
+define <16 x float> @test_div_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: @test_div_ps_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
+  ret <16 x float> %3
+}
+
+define <16 x float> @test_div_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: @test_div_ps_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[TMP3]]
+;
+  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
+  ret <16 x float> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double>, <8 x double>, i32)
+
+define <8 x double> @test_div_pd(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: @test_div_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_div_pd_round(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: @test_div_pd_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
+  ret <8 x double> %1
+}
+
+define <8 x double> @test_div_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_div_pd_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
+  ret <8 x double> %3
+}
+
+define <8 x double> @test_div_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: @test_div_pd_mask_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
+  ret <8 x double> %3
+}
+
+declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
+
+define i32 @test_comi_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comi_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i32 0, i32 4)
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %4, <4 x float> %8, i32 0, i32 4)
+  ret i32 %9
+}
+
+declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
+
+define i32 @test_comi_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comi_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 0, i32 4)
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %2, <2 x double> %4, i32 0, i32 4)
+  ret i32 %5
+}
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll b/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll
new file mode 100644
index 00000000000..2b472cad2da
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-bmi-tbm.ll
@@ -0,0 +1,271 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) nounwind readnone
+declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) nounwind readnone
+declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
+declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
+declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
+declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
+
+define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u32(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.tbm.bextri.u32(i32 [[A:%.*]], i32 1296)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 1296)
+  ret i32 %1
+}
+
+define i32 @test_x86_tbm_bextri_u32_zero_length(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u32_zero_length(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 1)
+  ret i32 %1
+}
+
+define i32 @test_x86_tbm_bextri_u32_large_shift(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u32_large_shift(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a, i32 288)
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u64(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.tbm.bextri.u64(i64 [[A:%.*]], i64 1312)
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 1312)
+  ret i64 %1
+}
+
+define i64 @test_x86_tbm_bextri_u64_zero_length(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u64_zero_length(
+; CHECK-NEXT:    ret i64 0
+;
+  %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 1)
+  ret i64 %1
+}
+
+define i64 @test_x86_tbm_bextri_u64_large_shift(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u64_large_shift(
+; CHECK-NEXT:    ret i64 0
+;
+  %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 320)
+  ret i64 %1
+}
+
+define i32 @test_x86_tbm_bextri_u32_constfold() nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold(
+; CHECK-NEXT:    ret i32 57005
+;
+  %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 4112) ; extract bits 31:16 from 0xDEADBEEF
+  ret i32 %1
+}
+
+define i32 @test_x86_tbm_bextri_u32_constfold2() nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold2(
+; CHECK-NEXT:    ret i32 233495534
+;
+  %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 8196) ; extract bits 35:4 from 0xDEADBEEF
+  ret i32 %1
+}
+
+define i32 @test_x86_tbm_bextri_u32_constfold3() nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u32_constfold3(
+; CHECK-NEXT:    ret i32 233495534
+;
+  %1 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 3735928559, i32 16388) ; extract bits 67:4 from 0xDEADBEEF
+  ret i32 %1
+}
+
+define i64 @test_x86_tbm_bextri_u64_constfold() nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold(
+; CHECK-NEXT:    ret i64 57005
+;
+  %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 4112) ; extract bits 31:16 from 0xDEADBEEF
+  ret i64 %1
+}
+
+define i64 @test_x86_tbm_bextri_u64_constfold2() nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold2(
+; CHECK-NEXT:    ret i64 233495534
+;
+  %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 16388) ; extract bits 67:4 from 0xDEADBEEF
+  ret i64 %1
+}
+
+define i64 @test_x86_tbm_bextri_u64_constfold3() nounwind readnone {
+; CHECK-LABEL: @test_x86_tbm_bextri_u64_constfold3(
+; CHECK-NEXT:    ret i64 233495534
+;
+  %1 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 3735928559, i64 32772) ; extract bits 131:4 from 0xDEADBEEF
+  ret i64 %1
+}
+
+define i32 @test_x86_bmi_bextri_32(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_32(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.bmi.bextr.32(i32 [[A:%.*]], i32 1296)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 1296)
+  ret i32 %1
+}
+
+define i32 @test_x86_bmi_bextri_32_zero_length(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_32_zero_length(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 1)
+  ret i32 %1
+}
+
+define i32 @test_x86_bmi_bextri_32_large_shift(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_32_large_shift(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a, i32 288)
+  ret i32 %1
+}
+
+define i64 @test_x86_bmi_bextri_64(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.bmi.bextr.64(i64 [[A:%.*]], i64 1312)
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 1312)
+  ret i64 %1
+}
+
+define i64 @test_x86_bmi_bextri_64_zero_length(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_64_zero_length(
+; CHECK-NEXT:    ret i64 0
+;
+  %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 1)
+  ret i64 %1
+}
+
+define i64 @test_x86_bmi_bextri_64_large_shift(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_64_large_shift(
+; CHECK-NEXT:    ret i64 0
+;
+  %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a, i64 320)
+  ret i64 %1
+}
+
+define i32 @test_x86_bmi_bextri_32_constfold() nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold(
+; CHECK-NEXT:    ret i32 57005
+;
+  %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 4112) ; extract bits 31:16 from 0xDEADBEEF
+  ret i32 %1
+}
+
+define i32 @test_x86_bmi_bextri_32_constfold2() nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold2(
+; CHECK-NEXT:    ret i32 233495534
+;
+  %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 8196) ; extract bits 35:4 from 0xDEADBEEF
+  ret i32 %1
+}
+
+define i32 @test_x86_bmi_bextri_32_constfold3() nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_32_constfold3(
+; CHECK-NEXT:    ret i32 233495534
+;
+  %1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 3735928559, i32 16388) ; extract bits 67:4 from 0xDEADBEEF
+  ret i32 %1
+}
+
+define i64 @test_x86_bmi_bextri_64_constfold() nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold(
+; CHECK-NEXT:    ret i64 57005
+;
+  %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 4112) ; extract bits 31:16 from 0xDEADBEEF
+  ret i64 %1
+}
+
+define i64 @test_x86_bmi_bextri_64_constfold2() nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold2(
+; CHECK-NEXT:    ret i64 233495534
+;
+  %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 16388) ; extract bits 67:4 from 0xDEADBEEF
+  ret i64 %1
+}
+
+define i64 @test_x86_bmi_bextri_64_constfold3() nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bextri_64_constfold3(
+; CHECK-NEXT:    ret i64 233495534
+;
+  %1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 3735928559, i64 32772) ; extract bits 131:4 from 0xDEADBEEF
+  ret i64 %1
+}
+
+define i32 @test_x86_bmi_bzhi_32(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bzhi_32(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.bmi.bzhi.32(i32 [[A:%.*]], i32 31)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a, i32 31)
+  ret i32 %1
+}
+
+define i32 @test_x86_bmi_bzhi_32_zero(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bzhi_32_zero(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a, i32 0)
+  ret i32 %1
+}
+
+define i32 @test_x86_bmi_bzhi_32_max(i32 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bzhi_32_max(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a, i32 32)
+  ret i32 %1
+}
+
+define i32 @test_x86_bmi_bzhi_32_constfold() nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bzhi_32_constfold(
+; CHECK-NEXT:    ret i32 1
+;
+  %1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 5, i32 1)
+  ret i32 %1
+}
+
+define i64 @test_x86_bmi_bzhi_64(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bzhi_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.bmi.bzhi.64(i64 [[A:%.*]], i64 63)
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a, i64 63)
+  ret i64 %1
+}
+
+define i64 @test_x86_bmi_bzhi_64_zero(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bzhi_64_zero(
+; CHECK-NEXT:    ret i64 0
+;
+  %1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a, i64 0)
+  ret i64 %1
+}
+
+define i64 @test_x86_bmi_bzhi_64_max(i64 %a) nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bzhi_64_max(
+; CHECK-NEXT:    ret i64 [[A:%.*]]
+;
+  %1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a, i64 64)
+  ret i64 %1
+}
+
+define i64 @test_x86_bmi_bzhi_64_constfold() nounwind readnone {
+; CHECK-LABEL: @test_x86_bmi_bzhi_64_constfold(
+; CHECK-NEXT:    ret i64 1
+;
+  %1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 5, i64 1)
+  ret i64 %1
+}
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll b/llvm/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll
new file mode 100644
index 00000000000..878b97d1bb2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; crc32 with 64-bit destination zeros high 32-bit.
+; rdar://9467055
+
+define i64 @test() nounwind {
+entry:
+; CHECK: test
+; CHECK: tail call i64 @llvm.x86.sse42.crc32.64.64
+; CHECK-NOT: and
+; CHECK: ret
+  %0 = tail call i64 @llvm.x86.sse42.crc32.64.64(i64 0, i64 4) nounwind
+  %1 = and i64 %0, 4294967295
+  ret i64 %1
+}
+
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-f16c.ll b/llvm/test/Transforms/InstCombine/X86/x86-f16c.ll
new file mode 100644
index 00000000000..6b5b6cb26ed
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-f16c.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
+
+;
+; Vector Demanded Bits
+;
+
+; Only bottom 4 elements required.
+define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
+; CHECK-LABEL: @demand_vcvtph2ps_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
+  ret <4 x float> %2
+}
+
+; All 8 elements required.
+define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
+; CHECK-LABEL: @demand_vcvtph2ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <8 x float> [[TMP2]]
+;
+  %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
+  ret <8 x float> %2
+}
+
+;
+; Constant Folding
+;
+
+define <4 x float> @fold_vcvtph2ps_128() {
+; CHECK-LABEL: @fold_vcvtph2ps_128(
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+;
+  %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
+  ret <4 x float> %1
+}
+
+define <8 x float> @fold_vcvtph2ps_256() {
+; CHECK-LABEL: @fold_vcvtph2ps_256(
+; CHECK-NEXT:    ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+;
+  %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
+  ret <8 x float> %1
+}
+
+define <4 x float> @fold_vcvtph2ps_128_zero() {
+; CHECK-LABEL: @fold_vcvtph2ps_128_zero(
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
+;
+  %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  ret <4 x float> %1
+}
+
+define <8 x float> @fold_vcvtph2ps_256_zero() {
+; CHECK-LABEL: @fold_vcvtph2ps_256_zero(
+; CHECK-NEXT:    ret <8 x float> zeroinitializer
+;
+  %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  ret <8 x float> %1
+}
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-fma.ll b/llvm/test/Transforms/InstCombine/X86/x86-fma.ll
new file mode 100644
index 00000000000..cddb1bf9c4e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-fma.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define <4 x float> @test_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: @test_vfmadd_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[A]], float [[TMP4]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP5]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
+  %7 = extractelement <4 x float> %a, i64 0
+  %8 = extractelement <4 x float> %3, i64 0
+  %9 = extractelement <4 x float> %6, i64 0
+  %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
+  %11 = insertelement <4 x float> %a, float %10, i64 0
+  ret <4 x float> %11
+}
+
+define float @test_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: @test_vfmadd_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT:    ret float [[TMP4]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = extractelement <4 x float> %3, i64 0
+  %5 = extractelement <4 x float> %b, i64 0
+  %6 = extractelement <4 x float> %c, i64 0
+  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
+  %8 = insertelement <4 x float> %3, float %7, i64 0
+  %9 = extractelement <4 x float> %8, i32 0
+  ret float %9
+}
+
+define float @test_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: @test_vfmadd_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = extractelement <4 x float> %3, i64 0
+  %5 = extractelement <4 x float> %b, i64 0
+  %6 = extractelement <4 x float> %c, i64 0
+  %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
+  %8 = insertelement <4 x float> %3, float %7, i64 0
+  %9 = extractelement <4 x float> %8, i32 1
+  ret float %9
+}
+
+define <2 x double> @test_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; CHECK-LABEL: @test_vfmadd_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[A]], double [[TMP4]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP5]]
+;
+  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
+  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
+  %3 = extractelement <2 x double> %a, i64 0
+  %4 = extractelement <2 x double> %1, i64 0
+  %5 = extractelement <2 x double> %2, i64 0
+  %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
+  %7 = insertelement <2 x double> %a, double %6, i64 0
+  ret <2 x double> %7
+}
+
+define double @test_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; CHECK-LABEL: @test_vfmadd_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = extractelement <2 x double> %1, i64 0
+  %3 = extractelement <2 x double> %b, i64 0
+  %4 = extractelement <2 x double> %c, i64 0
+  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+  %6 = insertelement <2 x double> %1, double %5, i64 0
+  %7 = extractelement <2 x double> %6, i32 0
+  ret double %7
+}
+
+define double @test_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; CHECK-LABEL: @test_vfmadd_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = extractelement <2 x double> %1, i64 0
+  %3 = extractelement <2 x double> %b, i64 0
+  %4 = extractelement <2 x double> %c, i64 0
+  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+  %6 = insertelement <2 x double> %1, double %5, i64 0
+  %7 = extractelement <2 x double> %6, i32 1
+  ret double %7
+}
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-insertps.ll b/llvm/test/Transforms/InstCombine/X86/x86-insertps.ll
new file mode 100644
index 00000000000..54f00644f10
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-insertps.ll
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+; If all zero mask bits are set, return a zero regardless of the other control bits.
+
+define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x0f(
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
+  ret <4 x float> %res
+}
+
+define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xff(
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
+  ret <4 x float> %res
+}
+
+; If some zero mask bits are set that do not override the insertion, we do not change anything.
+
+define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x0c(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], i8 12)
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
+  ret <4 x float> %res
+}
+
+; ...unless both input vectors are the same operand.
+
+define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
+; CHECK-LABEL: @insertps_0x15_single_input(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
+  ret <4 x float> %res
+}
+
+; The zero mask overrides the insertion lane.
+
+define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
+; CHECK-LABEL: @insertps_0x1a_single_input(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
+  ret <4 x float> %res
+}
+
+; The zero mask overrides the insertion lane, so the second input vector is not used.
+
+define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xc1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[V1:%.*]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
+  ret <4 x float> %res
+}
+
+; If no zero mask bits are set, convert to a shuffle.
+
+define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x00(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V2:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
+  ret <4 x float> %res
+}
+
+define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x10(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
+  ret <4 x float> %res
+}
+
+define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x20(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
+  ret <4 x float> %res
+}
+
+define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0x30(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
+  ret <4 x float> %res
+}
+
+define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xc0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 7, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
+  ret <4 x float> %res
+}
+
+define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xd0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 7, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
+  ret <4 x float> %res
+}
+
+define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xe0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 7, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
+  ret <4 x float> %res
+}
+
+define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @insertps_0xf0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
+  ret <4 x float> %res
+}
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll
new file mode 100644
index 00000000000..be190007327
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll
@@ -0,0 +1,328 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;; MASKED LOADS
+
+; If the mask isn't constant, do nothing.
+
+define <4 x float> @mload(i8* %f, <4 x i32> %mask) {
+; CHECK-LABEL: @mload(
+; CHECK-NEXT:    [[LD:%.*]] = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* [[F:%.*]], <4 x i32> [[MASK:%.*]])
+; CHECK-NEXT:    ret <4 x float> [[LD]]
+;
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
+  ret <4 x float> %ld
+
+}
+
+; Zero mask returns a zero vector.
+
+define <4 x float> @mload_zeros(i8* %f) {
+; CHECK-LABEL: @mload_zeros(
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
+;
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer)
+  ret <4 x float> %ld
+
+}
+
+; Only the sign bit matters.
+
+define <4 x float> @mload_fake_ones(i8* %f) {
+; CHECK-LABEL: @mload_fake_ones(
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
+;
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>)
+  ret <4 x float> %ld
+
+}
+
+; All mask bits are set, so this is just a vector load.
+
+define <4 x float> @mload_real_ones(i8* %f) {
+; CHECK-LABEL: @mload_real_ones(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <4 x float>, <4 x float>* [[CASTVEC]], align 1
+; CHECK-NEXT:    ret <4 x float> [[UNMASKEDLOAD]]
+;
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 2147483648>)
+  ret <4 x float> %ld
+
+}
+
+; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
+
+define <4 x float> @mload_one_one(i8* %f) {
+; CHECK-LABEL: @mload_one_one(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
+  ret <4 x float> %ld
+
+}
+
+; Try doubles.
+
+define <2 x double> @mload_one_one_double(i8* %f) {
+; CHECK-LABEL: @mload_one_one_double(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> <double undef, double 0.000000e+00>)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %ld = tail call <2 x double> @llvm.x86.avx.maskload.pd(i8* %f, <2 x i64> <i64 -1, i64 0>)
+  ret <2 x double> %ld
+
+}
+
+; Try 256-bit FP ops.
+
+define <8 x float> @mload_v8f32(i8* %f) {
+; CHECK-LABEL: @mload_v8f32(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>)
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
+  ret <8 x float> %ld
+
+}
+
+define <4 x double> @mload_v4f64(i8* %f) {
+; CHECK-LABEL: @mload_v4f64(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> <double undef, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>)
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %ld = tail call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
+  ret <4 x double> %ld
+
+}
+
+; Try the AVX2 variants.
+
+define <4 x i32> @mload_v4i32(i8* %f) {
+; CHECK-LABEL: @mload_v4i32(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 0, i32 0, i32 0, i32 undef>)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %ld = tail call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
+  ret <4 x i32> %ld
+
+}
+
+define <2 x i64> @mload_v2i64(i8* %f) {
+; CHECK-LABEL: @mload_v2i64(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> <i64 undef, i64 0>)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %ld = tail call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %f, <2 x i64> <i64 -1, i64 0>)
+  ret <2 x i64> %ld
+
+}
+
+define <8 x i32> @mload_v8i32(i8* %f) {
+; CHECK-LABEL: @mload_v8i32(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0>)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %ld = tail call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
+  ret <8 x i32> %ld
+
+}
+
+define <4 x i64> @mload_v4i64(i8* %f) {
+; CHECK-LABEL: @mload_v4i64(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i64>*
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> <i64 undef, i64 0, i64 0, i64 0>)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
+  ret <4 x i64> %ld
+
+}
+
+
+;; MASKED STORES
+
+; If the mask isn't constant, do nothing.
+
+define void @mstore(i8* %f, <4 x i32> %mask, <4 x float> %v) {
+; CHECK-LABEL: @mstore(
+; CHECK-NEXT:    tail call void @llvm.x86.avx.maskstore.ps(i8* [[F:%.*]], <4 x i32> [[MASK:%.*]], <4 x float> [[V:%.*]])
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
+  ret void
+
+}
+
+; Zero mask is a nop.
+
+define void @mstore_zeros(i8* %f, <4 x float> %v)  {
+; CHECK-LABEL: @mstore_zeros(
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> zeroinitializer, <4 x float> %v)
+  ret void
+
+}
+
+; Only the sign bit matters.
+
+define void @mstore_fake_ones(i8* %f, <4 x float> %v) {
+; CHECK-LABEL: @mstore_fake_ones(
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>, <4 x float> %v)
+  ret void
+
+}
+
+; All mask bits are set, so this is just a vector store.
+
+define void @mstore_real_ones(i8* %f, <4 x float> %v) {
+; CHECK-LABEL: @mstore_real_ones(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[V:%.*]], <4 x float>* [[CASTVEC]], align 1
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -2147483648>, <4 x float> %v)
+  ret void
+
+}
+
+; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
+
+define void @mstore_one_one(i8* %f, <4 x float> %v) {
+; CHECK-LABEL: @mstore_one_one(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
+; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> [[V:%.*]], <4 x float>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>, <4 x float> %v)
+  ret void
+
+}
+
+; Try doubles.
+
+define void @mstore_one_one_double(i8* %f, <2 x double> %v) {
+; CHECK-LABEL: @mstore_one_one_double(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <2 x double>*
+; CHECK-NEXT:    call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[V:%.*]], <2 x double>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx.maskstore.pd(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x double> %v)
+  ret void
+
+}
+
+; Try 256-bit FP ops.
+
+define void @mstore_v8f32(i8* %f, <8 x float> %v) {
+; CHECK-LABEL: @mstore_v8f32(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x float>*
+; CHECK-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[V:%.*]], <8 x float>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx.maskstore.ps.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x float> %v)
+  ret void
+
+}
+
+define void @mstore_v4f64(i8* %f, <4 x double> %v) {
+; CHECK-LABEL: @mstore_v4f64(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x double>*
+; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[V:%.*]], <4 x double>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx.maskstore.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x double> %v)
+  ret void
+
+}
+
+; Try the AVX2 variants.
+
+define void @mstore_v4i32(i8* %f, <4 x i32> %v) {
+; CHECK-LABEL: @mstore_v4i32(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i32>*
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[V:%.*]], <4 x i32>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx2.maskstore.d(i8* %f, <4 x i32> <i32 0, i32 1, i32 -1, i32 -2>, <4 x i32> %v)
+  ret void
+
+}
+
+define void @mstore_v2i64(i8* %f, <2 x i64> %v) {
+; CHECK-LABEL: @mstore_v2i64(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <2 x i64>*
+; CHECK-NEXT:    call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> [[V:%.*]], <2 x i64>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx2.maskstore.q(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x i64> %v)
+  ret void
+
+}
+
+define void @mstore_v8i32(i8* %f, <8 x i32> %v) {
+; CHECK-LABEL: @mstore_v8i32(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x i32>*
+; CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[V:%.*]], <8 x i32>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx2.maskstore.d.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x i32> %v)
+  ret void
+
+}
+
+define void @mstore_v4i64(i8* %f, <4 x i64> %v) {
+; CHECK-LABEL: @mstore_v4i64(
+; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i64>*
+; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> [[V:%.*]], <4 x i64>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.avx2.maskstore.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x i64> %v)
+  ret void
+
+}
+
+; The original SSE2 masked store variant.
+
+define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) {
+; CHECK-LABEL: @mstore_v16i8_sse2_zeros(
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p)
+  ret void
+
+}
+
+
+declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>)
+declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>)
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>)
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>)
+
+declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>)
+declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>)
+declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>)
+declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>)
+
+declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>)
+declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>)
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>)
+declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>)
+
+declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>)
+declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>)
+declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>)
+declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>)
+
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*)
+
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll
new file mode 100644
index 00000000000..7be8f08dc63
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll
@@ -0,0 +1,458 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;
+; DemandedBits - MOVMSK zeros the upper bits of the result.
+;
+
+define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) {
+; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx [[A0:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
+  %2 = and i32 %1, 255
+  ret i32 %2
+}
+
+define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
+; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
+  %2 = and i32 %1, 15
+  ret i32 %2
+}
+
+define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
+; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i1> [[TMP2]] to i2
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i2 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
+  %2 = and i32 %1, 3
+  ret i32 %2
+}
+
+define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <16 x i8> [[A0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i1> [[TMP1]] to i16
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
+  %2 = and i32 %1, 65535
+  ret i32 %2
+}
+
+define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
+; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x float> [[A0:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i1> [[TMP2]] to i8
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
+  %2 = and i32 %1, 255
+  ret i32 %2
+}
+
+define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
+; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x double> [[A0:%.*]] to <4 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
+  %2 = and i32 %1, 15
+  ret i32 %2
+}
+
+; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
+
+;
+; DemandedBits - If we don't use the lower bits then we just return zero.
+;
+
+define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) {
+; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
+  %2 = and i32 %1, -256
+  ret i32 %2
+}
+
+define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
+; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
+  %2 = and i32 %1, -16
+  ret i32 %2
+}
+
+define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
+; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
+  %2 = and i32 %1, -4
+  ret i32 %2
+}
+
+define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
+  %2 = and i32 %1, -65536
+  ret i32 %2
+}
+
+define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
+; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
+  %2 = and i32 %1, -256
+  ret i32 %2
+}
+
+define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
+; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
+  %2 = and i32 %1, -16
+  ret i32 %2
+}
+
+; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
+
+;
+; Constant Folding (UNDEF -> ZERO)
+;
+
+define i32 @undef_x86_mmx_pmovmskb() {
+; CHECK-LABEL: @undef_x86_mmx_pmovmskb(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef)
+  ret i32 %1
+}
+
+define i32 @undef_x86_sse_movmsk_ps() {
+; CHECK-LABEL: @undef_x86_sse_movmsk_ps(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef)
+  ret i32 %1
+}
+
+define i32 @undef_x86_sse2_movmsk_pd() {
+; CHECK-LABEL: @undef_x86_sse2_movmsk_pd(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef)
+  ret i32 %1
+}
+
+define i32 @undef_x86_sse2_pmovmskb_128() {
+; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef)
+  ret i32 %1
+}
+
+define i32 @undef_x86_avx_movmsk_ps_256() {
+; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef)
+  ret i32 %1
+}
+
+define i32 @undef_x86_avx_movmsk_pd_256() {
+; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef)
+  ret i32 %1
+}
+
+define i32 @undef_x86_avx2_pmovmskb() {
+; CHECK-LABEL: @undef_x86_avx2_pmovmskb(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef)
+  ret i32 %1
+}
+
+;
+; Constant Folding (ZERO -> ZERO)
+;
+
+define i32 @zero_x86_mmx_pmovmskb() {
+; CHECK-LABEL: @zero_x86_mmx_pmovmskb(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<1 x i64> zeroinitializer to x86_mmx))
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = bitcast <1 x i64> zeroinitializer to x86_mmx
+  %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
+  ret i32 %2
+}
+
+define i32 @zero_x86_sse_movmsk_ps() {
+; CHECK-LABEL: @zero_x86_sse_movmsk_ps(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer)
+  ret i32 %1
+}
+
+define i32 @zero_x86_sse2_movmsk_pd() {
+; CHECK-LABEL: @zero_x86_sse2_movmsk_pd(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer)
+  ret i32 %1
+}
+
+define i32 @zero_x86_sse2_pmovmskb_128() {
+; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer)
+  ret i32 %1
+}
+
+define i32 @zero_x86_avx_movmsk_ps_256() {
+; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer)
+  ret i32 %1
+}
+
+define i32 @zero_x86_avx_movmsk_pd_256() {
+; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer)
+  ret i32 %1
+}
+
+define i32 @zero_x86_avx2_pmovmskb() {
+; CHECK-LABEL: @zero_x86_avx2_pmovmskb(
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer)
+  ret i32 %1
+}
+
+;
+; Constant Folding
+;
+
+define i32 @fold_x86_mmx_pmovmskb() {
+; CHECK-LABEL: @fold_x86_mmx_pmovmskb(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<8 x i8> <i8 0, i8 -1, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 0> to x86_mmx))
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to x86_mmx
+  %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
+  ret i32 %2
+}
+
+define i32 @fold_x86_sse_movmsk_ps() {
+; CHECK-LABEL: @fold_x86_sse_movmsk_ps(
+; CHECK-NEXT:    ret i32 10
+;
+  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>)
+  ret i32 %1
+}
+
+define i32 @fold_x86_sse2_movmsk_pd() {
+; CHECK-LABEL: @fold_x86_sse2_movmsk_pd(
+; CHECK-NEXT:    ret i32 2
+;
+  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>)
+  ret i32 %1
+}
+
+define i32 @fold_x86_sse2_pmovmskb_128() {
+; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128(
+; CHECK-NEXT:    ret i32 5654
+;
+  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
+  ret i32 %1
+}
+
+define i32 @fold_x86_avx_movmsk_ps_256() {
+; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256(
+; CHECK-NEXT:    ret i32 170
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>)
+  ret i32 %1
+}
+
+define i32 @fold_x86_avx_movmsk_pd_256() {
+; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256(
+; CHECK-NEXT:    ret i32 10
+;
+  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>)
+  ret i32 %1
+}
+
+define i32 @fold_x86_avx2_pmovmskb() {
+; CHECK-LABEL: @fold_x86_avx2_pmovmskb(
+; CHECK-NEXT:    ret i32 370546176
+;
+  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
+  ret i32 %1
+}
+
+define i32 @sext_sse_movmsk_ps(<4 x i1> %x) {
+; CHECK-LABEL: @sext_sse_movmsk_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %sext = sext <4 x i1> %x to <4 x i32>
+  %bc = bitcast <4 x i32> %sext to <4 x float>
+  %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc)
+  ret i32 %r
+}
+
+define i32 @sext_sse2_movmsk_pd(<2 x i1> %x) {
+; CHECK-LABEL: @sext_sse2_movmsk_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i1> [[X:%.*]] to i2
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i2 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %sext = sext <2 x i1> %x to <2 x i64>
+  %bc = bitcast <2 x i64> %sext to <2 x double>
+  %r = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %bc)
+  ret i32 %r
+}
+
+define i32 @sext_sse2_pmovmskb_128(<16 x i1> %x) {
+; CHECK-LABEL: @sext_sse2_pmovmskb_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i1> [[X:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %sext = sext <16 x i1> %x to <16 x i8>
+  %r = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %sext)
+  ret i32 %r
+}
+
+define i32 @sext_avx_movmsk_ps_256(<8 x i1> %x) {
+; CHECK-LABEL: @sext_avx_movmsk_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i1> [[X:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %sext = sext <8 x i1> %x to <8 x i32>
+  %bc = bitcast <8 x i32> %sext to <8 x float>
+  %r = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %bc)
+  ret i32 %r
+}
+
+define i32 @sext_avx_movmsk_pd_256(<4 x i1> %x) {
+; CHECK-LABEL: @sext_avx_movmsk_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %sext = sext <4 x i1> %x to <4 x i64>
+  %bc = bitcast <4 x i64> %sext to <4 x double>
+  %r = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %bc)
+  ret i32 %r
+}
+
+define i32 @sext_avx2_pmovmskb(<32 x i1> %x) {
+; CHECK-LABEL: @sext_avx2_pmovmskb(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <32 x i1> [[X:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %sext = sext <32 x i1> %x to <32 x i8>
+  %r = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %sext)
+  ret i32 %r
+}
+
+; Bitcast from sign-extended scalar.
+
+define i32 @sext_sse_movmsk_ps_scalar_source(i1 %x) {
+; CHECK-LABEL: @sext_sse_movmsk_ps_scalar_source(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i1 [[X:%.*]] to i128
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[SEXT]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %sext = sext i1 %x to i128
+  %bc = bitcast i128 %sext to <4 x float>
+  %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc)
+  ret i32 %r
+}
+
+; Bitcast from vector type with more elements.
+
+define i32 @sext_sse_movmsk_ps_too_many_elts(<8 x i1> %x) {
+; CHECK-LABEL: @sext_sse_movmsk_ps_too_many_elts(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[X:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[SEXT]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %sext = sext <8 x i1> %x to <8 x i16>
+  %bc = bitcast <8 x i16> %sext to <4 x float>
+  %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc)
+  ret i32 %r
+}
+
+; Handle this by doing a bitcasted sign-bit test after the sext.
+
+define i32 @sext_sse_movmsk_ps_must_replicate_bits(<2 x i1> %x) {
+; CHECK-LABEL: @sext_sse_movmsk_ps_must_replicate_bits(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[X:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SEXT]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %sext = sext <2 x i1> %x to <2 x i64>
+  %bc = bitcast <2 x i64> %sext to <4 x float>
+  %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc)
+  ret i32 %r
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx)
+
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
+declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
+declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll
new file mode 100644
index 00000000000..be1ba8ad14e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll
@@ -0,0 +1,281 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; UNDEF Elts
+;
+
+define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuludq_128(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuludq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuludq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuldq_128(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuldq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_pmuldq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_128(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuludq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_128(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @undef_zero_pmuldq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
+  ret <8 x i64> %1
+}
+
+;
+; Constant Folding
+;
+
+define <2 x i64> @fold_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuludq_128(
+; CHECK-NEXT:    ret <2 x i64> <i64 9223372030412324865, i64 4294967295>
+;
+  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2147483647, i32 1, i32 1, i32 3>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @fold_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuludq_256(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @fold_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuludq_512(
+; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 0, i64 255, i64 131070, i64 0, i64 -281474976645121, i64 140737488289792, i64 281470681743360>
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 1, i32 1, i32 2, i32 2, i32 undef, i32 undef, i32 -1, i32 -1, i32 65536, i32 -1, i32 -65536, i32 undef>, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 255, i32 -256, i32 65535, i32 -65536, i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
+  ret <8 x i64> %1
+}
+
+define <2 x i64> @fold_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuldq_128(
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 2>
+;
+  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 undef, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 undef, i32 1, i32 -2, i32 3>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @fold_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuldq_256(
+; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 4294836225, i64 140737488289792, i64 -140737488355328>
+;
+  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> <i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>, <8 x i32> <i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
+  ret <4 x i64> %1
+}
+
+define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @fold_pmuldq_512(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> <i32 undef, i32 -1, i32 -3, i32 -1, i32 8, i32 10, i32 -256, i32 65536, i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>)
+  ret <8 x i64> %1
+}
+
+;
+; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
+;
+
+define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuludq_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i64> [[TMP3]], <i64 4294967295, i64 undef>
+; CHECK-NEXT:    [[TMP6:%.*]] = and <2 x i64> [[TMP4]], <i64 4294967295, i64 undef>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <2 x i64> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[TMP8]]
+;
+  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  %3 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuludq_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[TMP3]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    ret <4 x i64> [[TMP7]]
+;
+  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %3 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %1, <8 x i32> %2)
+  ret <4 x i64> %3
+}
+
+define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuludq_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i64> [[TMP3]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i64> [[TMP4]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    ret <8 x i64> [[TMP7]]
+;
+  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
+  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
+  %3 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %1, <16 x i32> %2)
+  ret <8 x i64> %3
+}
+
+define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuldq_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    ret <2 x i64> [[TMP9]]
+;
+  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  %3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %1, <4 x i32> %2)
+  ret <2 x i64> %3
+}
+
+define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuldq_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
+; CHECK-NEXT:    ret <4 x i64> [[TMP10]]
+;
+  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %1, <8 x i32> %2)
+  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
+  ret <4 x i64> %4
+}
+
+define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @test_demanded_elts_pmuldq_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
+; CHECK-NEXT:    ret <8 x i64> [[TMP10]]
+;
+  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
+  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
+  %3 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %1, <16 x i32> %2)
+  %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
+  ret <8 x i64> %4
+}
+
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>) nounwind readnone
+declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll
new file mode 100644
index 00000000000..f3c41a8aa47
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll
@@ -0,0 +1,366 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; UNDEF Elts
+;
+
+define <8 x i16> @undef_packssdw_128() {
+; CHECK-LABEL: @undef_packssdw_128(
+; CHECK-NEXT:    ret <8 x i16> undef
+;
+  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> undef, <4 x i32> undef)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @undef_packusdw_128() {
+; CHECK-LABEL: @undef_packusdw_128(
+; CHECK-NEXT:    ret <8 x i16> undef
+;
+  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> undef)
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @undef_packsswb_128() {
+; CHECK-LABEL: @undef_packsswb_128(
+; CHECK-NEXT:    ret <16 x i8> undef
+;
+  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> undef, <8 x i16> undef)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @undef_packuswb_128() {
+; CHECK-LABEL: @undef_packuswb_128(
+; CHECK-NEXT:    ret <16 x i8> undef
+;
+  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> undef, <8 x i16> undef)
+  ret <16 x i8> %1
+}
+
+define <16 x i16> @undef_packssdw_256() {
+; CHECK-LABEL: @undef_packssdw_256(
+; CHECK-NEXT:    ret <16 x i16> undef
+;
+  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> undef)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @undef_packusdw_256() {
+; CHECK-LABEL: @undef_packusdw_256(
+; CHECK-NEXT:    ret <16 x i16> undef
+;
+  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> undef)
+  ret <16 x i16> %1
+}
+
+define <32 x i8> @undef_packsswb_256() {
+; CHECK-LABEL: @undef_packsswb_256(
+; CHECK-NEXT:    ret <32 x i8> undef
+;
+  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> undef)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @undef_packuswb_256() {
+; CHECK-LABEL: @undef_packuswb_256(
+; CHECK-NEXT:    ret <32 x i8> undef
+;
+  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> undef, <16 x i16> undef)
+  ret <32 x i8> %1
+}
+
+define <32 x i16> @undef_packssdw_512() {
+; CHECK-LABEL: @undef_packssdw_512(
+; CHECK-NEXT:    ret <32 x i16> undef
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @undef_packusdw_512() {
+; CHECK-LABEL: @undef_packusdw_512(
+; CHECK-NEXT:    ret <32 x i16> undef
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef)
+  ret <32 x i16> %1
+}
+
+define <64 x i8> @undef_packsswb_512() {
+; CHECK-LABEL: @undef_packsswb_512(
+; CHECK-NEXT:    ret <64 x i8> undef
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @undef_packuswb_512() {
+; CHECK-LABEL: @undef_packuswb_512(
+; CHECK-NEXT:    ret <64 x i8> undef
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef)
+  ret <64 x i8> %1
+}
+
+;
+; Constant Folding
+;
+
+define <8 x i16> @fold_packssdw_128() {
+; CHECK-LABEL: @fold_packssdw_128(
+; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 -1, i16 32767, i16 -32768, i16 0, i16 0, i16 0, i16 0>
+;
+  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> <i32 0, i32 -1, i32 65536, i32 -131072>, <4 x i32> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @fold_packusdw_128() {
+; CHECK-LABEL: @fold_packusdw_128(
+; CHECK-NEXT:    ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 -32768, i16 -1>
+;
+  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> <i32 0, i32 -1, i32 32768, i32 65537>)
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @fold_packsswb_128() {
+; CHECK-LABEL: @fold_packsswb_128(
+; CHECK-NEXT:    ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
+;
+  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> undef)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @fold_packuswb_128() {
+; CHECK-LABEL: @fold_packuswb_128(
+; CHECK-NEXT:    ret <16 x i8> <i8 0, i8 1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 15, i8 0, i8 127, i8 0, i8 1, i8 0, i8 1, i8 0, i8 0>
+;
+  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 1, i16 -1, i16 255, i16 65535, i16 -32768, i16 -127, i16 15>, <8 x i16> <i16 -15, i16 127, i16 32768, i16 -65535, i16 -255, i16 1, i16 -1, i16 0>)
+  ret <16 x i8> %1
+}
+
+define <16 x i16> @fold_packssdw_256() {
+; CHECK-LABEL: @fold_packssdw_256(
+; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 256, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
+;
+  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <8 x i32> undef)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @fold_packusdw_256() {
+; CHECK-LABEL: @fold_packusdw_256(
+; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 256, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
+;
+  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> <i32 0, i32 -256, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
+  ret <16 x i16> %1
+}
+
+define <32 x i8> @fold_packsswb_256() {
+; CHECK-LABEL: @fold_packsswb_256(
+; CHECK-NEXT:    ret <32 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+;
+  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> zeroinitializer)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @fold_packuswb_256() {
+; CHECK-LABEL: @fold_packuswb_256(
+; CHECK-NEXT:    ret <32 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
+;
+  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> zeroinitializer, <16 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 256, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
+  ret <32 x i8> %1
+}
+
+define <32 x i16> @fold_packssdw_512() {
+; CHECK-LABEL: @fold_packssdw_512(
+; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <16 x i32> undef)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @fold_packusdw_512() {
+; CHECK-LABEL: @fold_packusdw_512(
+; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767, i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
+;
+  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> <i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767, i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
+  ret <32 x i16> %1
+}
+
+define <64 x i8> @fold_packsswb_512() {
+; CHECK-LABEL: @fold_packsswb_512(
+; CHECK-NEXT:    ret <64 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @fold_packuswb_512() {
+; CHECK-LABEL: @fold_packuswb_512(
+; CHECK-NEXT:    ret <64 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
+;
+  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
+  ret <64 x i8> %1
+}
+
+;
+; Demanded Elts
+;
+
+define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @elts_packssdw_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
+  %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 7, i32 7, i32 7, i32 7>
+  ret <8 x i16> %4
+}
+
+define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @elts_packusdw_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = insertelement <4 x i32> %a0, i32 0, i32 0
+  %2 = insertelement <4 x i32> %a1, i32 0, i32 3
+  %3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
+  ret <8 x i16> %4
+}
+
+define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: @elts_packsswb_128(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %1 = insertelement <8 x i16> %a0, i16 0, i32 0
+  %2 = insertelement <8 x i16> %a1, i16 0, i32 0
+  %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
+  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i8> %4
+}
+
+define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: @elts_packuswb_128(
+; CHECK-NEXT:    ret <16 x i8> undef
+;
+  %1 = insertelement <8 x i16> undef, i16 0, i32 0
+  %2 = insertelement <8 x i16> undef, i16 0, i32 0
+  %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
+  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  ret <16 x i8> %4
+}
+
+define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_packssdw_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef>
+  %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
+  %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15>
+  ret <16 x i16> %4
+}
+
+define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_packusdw_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
+;
+  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2)
+  %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %4
+}
+
+define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
+; CHECK-LABEL: @elts_packsswb_256(
+; CHECK-NEXT:    ret <32 x i8> zeroinitializer
+;
+  %1 = insertelement <16 x i16> %a0, i16 0, i32 0
+  %2 = insertelement <16 x i16> %a1, i16 0, i32 8
+  %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
+  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+  ret <32 x i8> %4
+}
+
+define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
+; CHECK-LABEL: @elts_packuswb_256(
+; CHECK-NEXT:    ret <32 x i8> undef
+;
+  %1 = insertelement <16 x i16> undef, i16 0, i32 1
+  %2 = insertelement <16 x i16> undef, i16 0, i32 0
+  %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
+  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer
+  ret <32 x i8> %4
+}
+
+define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @elts_packssdw_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef, i32 undef, i32 10, i32 9, i32 undef, i32 undef, i32 14, i32 13, i32 undef>
+  %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
+  %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 18, i32 19, i32 20, i32 undef, i32 undef, i32 23, i32 24, i32 undef, i32 undef, i32 27, i32 28, i32 undef, i32 undef, i32 31>
+  ret <32 x i16> %4
+}
+
+define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK-LABEL: @elts_packusdw_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
+;
+  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+  %3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2)
+  %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i16> %4
+}
+
+define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
+; CHECK-LABEL: @elts_packsswb_512(
+; CHECK-NEXT:    ret <64 x i8> zeroinitializer
+;
+  %1 = insertelement <32 x i16> %a0, i16 0, i32 0
+  %2 = insertelement <32 x i16> %a1, i16 0, i32 8
+  %3 = insertelement <32 x i16> %1, i16 0, i32 16
+  %4 = insertelement <32 x i16> %2, i16 0, i32 24
+  %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4)
+  %6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
+  ret <64 x i8> %6
+}
+
+define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
+; CHECK-LABEL: @elts_packuswb_512(
+; CHECK-NEXT:    ret <64 x i8> undef
+;
+  %1 = insertelement <32 x i16> undef, i16 0, i32 1
+  %2 = insertelement <32 x i16> undef, i16 0, i32 0
+  %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2)
+  %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
+  ret <64 x i8> %4
+}
+
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone
+declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone
+declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone
+declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
new file mode 100644
index 00000000000..d3ffd178010
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
@@ -0,0 +1,514 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <16 x i8> @identity_test(<16 x i8> %InVec) {
+; CHECK-LABEL: @identity_test(
+; CHECK-NEXT:    ret <16 x i8> %InVec
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx2(
+; CHECK-NEXT:    ret <32 x i8> %InVec
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @identity_test_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx512(
+; CHECK-NEXT:    ret <64 x i8> %InVec
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <64 x i8> %1
+}
+
+; Verify that instcombine is able to fold byte shuffles with zero masks.
+
+define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx2(
+; CHECK-NEXT:    ret <32 x i8> zeroinitializer
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @fold_to_zero_vector_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx512(
+; CHECK-NEXT:    ret <64 x i8> zeroinitializer
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <64 x i8> %1
+}
+
+; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <16 x i8> @splat_test(<16 x i8> %InVec) {
+; CHECK-LABEL: @splat_test(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer)
+  ret <16 x i8> %1
+}
+
+; In the test case below, elements in the low 128-bit lane of the result
+; vector are equal to the lower byte of %InVec (shuffle index 0).
+; Elements in the high 128-bit lane of the result vector are equal to
+; the lower byte in the high 128-bit lane of %InVec (shuffle index 16).
+
+define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @splat_test_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @splat_test_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @splat_test_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> zeroinitializer)
+  ret <64 x i8> %1
+}
+
+; Each of the byte shuffles in the following tests is equivalent to a blend between
+; vector %InVec and a vector of all zeroes.
+
+define <16 x i8> @blend1(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend1(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend2(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend3(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend3(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend4(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend4(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend5(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend5(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend6(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend6(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @blend1_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend1_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 48, i32 17, i32 48, i32 19, i32 48, i32 21, i32 48, i32 23, i32 48, i32 25, i32 48, i32 27, i32 48, i32 29, i32 48, i32 31>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend2_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend2_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 48, i32 48, i32 18, i32 19, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 26, i32 27, i32 48, i32 48, i32 30, i32 31>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend3_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend3_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 20, i32 21, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend4_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend4_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend5_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend5_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend6_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend6_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @blend1_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @blend1_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 64, i32 1, i32 64, i32 3, i32 64, i32 5, i32 64, i32 7, i32 64, i32 9, i32 64, i32 11, i32 64, i32 13, i32 64, i32 15, i32 80, i32 17, i32 80, i32 19, i32 80, i32 21, i32 80, i32 23, i32 80, i32 25, i32 80, i32 27, i32 80, i32 29, i32 80, i32 31, i32 96, i32 33, i32 96, i32 35, i32 96, i32 37, i32 96, i32 39, i32 96, i32 41, i32 96, i32 43, i32 96, i32 45, i32 96, i32 47, i32 112, i32 49, i32 112, i32 51, i32 112, i32 53, i32 112, i32 55, i32 112, i32 57, i32 112, i32 59, i32 112, i32 61, i32 112, i32 63>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @blend2_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @blend2_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 64, i32 64, i32 2, i32 3, i32 64, i32 64, i32 6, i32 7, i32 64, i32 64, i32 10, i32 11, i32 64, i32 64, i32 14, i32 15, i32 80, i32 80, i32 18, i32 19, i32 80, i32 80, i32 22, i32 23, i32 80, i32 80, i32 26, i32 27, i32 80, i32 80, i32 30, i32 31, i32 96, i32 96, i32 34, i32 35, i32 96, i32 96, i32 38, i32 39, i32 96, i32 96, i32 42, i32 43, i32 96, i32 96, i32 46, i32 47, i32 112, i32 112, i32 50, i32 51, i32 112, i32 112, i32 54, i32 55, i32 112, i32 112, i32 58, i32 59, i32 112, i32 112, i32 62, i32 63>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @blend3_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @blend3_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 64, i32 64, i32 64, i32 64, i32 4, i32 5, i32 6, i32 7, i32 64, i32 64, i32 64, i32 64, i32 12, i32 13, i32 14, i32 15, i32 80, i32 80, i32 80, i32 80, i32 20, i32 21, i32 22, i32 23, i32 80, i32 80, i32 80, i32 80, i32 28, i32 29, i32 30, i32 31, i32 96, i32 96, i32 96, i32 96, i32 36, i32 37, i32 38, i32 39, i32 96, i32 96, i32 96, i32 96, i32 44, i32 45, i32 46, i32 47, i32 112, i32 112, i32 112, i32 112, i32 52, i32 53, i32 54, i32 55, i32 112, i32 112, i32 112, i32 112, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @blend4_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @blend4_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @blend5_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @blend5_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 16, i32 17, i32 18, i32 19, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 32, i32 33, i32 34, i32 35, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 48, i32 49, i32 50, i32 51, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @blend6_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @blend6_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 0, i32 1, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 16, i32 17, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 32, i32 33, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 48, i32 49, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128,i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <64 x i8> %1
+}
+
+; movq idiom.
+define <16 x i8> @movq_idiom(<16 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @movq_idiom_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <64 x i8> %1
+}
+
+; Vector permutations using byte shuffles.
+
+define <16 x i8> @permute1(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute1(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @permute2(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @permute1_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute1_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @permute2_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute2_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @permute1_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @permute1_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31, i32 36, i32 37, i32 38, i32 39, i32 36, i32 37, i32 38, i32 39, i32 44, i32 45, i32 46, i32 47, i32 44, i32 45, i32 46, i32 47, i32 52, i32 53, i32 54, i32 55, i32 52, i32 53, i32 54, i32 55, i32 60, i32 61, i32 62, i32 63, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+  ret <64 x i8> %1
+}
+
+define <64 x i8> @permute2_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @permute2_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+  ret <64 x i8> %1
+}
+
+; Test that instcombine correctly folds a pshufb with values that
+; are not -128 and that are not encoded in four bits.
+
+define <16 x i8> @identity_test2_2(<16 x i8> %InVec) {
+; CHECK-LABEL: @identity_test2_2(
+; CHECK-NEXT:    ret <16 x i8> %InVec
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx2_2(
+; CHECK-NEXT:    ret <32 x i8> %InVec
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @identity_test_avx512_2(<64 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx512_2(
+; CHECK-NEXT:    ret <64 x i8> %InVec
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63, i8 96, i8 49, i8 66, i8 99, i8 52, i8 69, i8 102, i8 55, i8 72, i8 105, i8 58, i8 75, i8 108, i8 61, i8 78, i8 111, i8 64, i8 81, i8 114, i8 67, i8 84, i8 117, i8 70, i8 87, i8 120, i8 73, i8 90, i8 123, i8 76, i8 93, i8 126, i8 79>)
+  ret <64 x i8> %1
+}
+
+define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_2(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -125, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx2_2(
+; CHECK-NEXT:    ret <32 x i8> zeroinitializer
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @fold_to_zero_vector_avx512_2(<64 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx512_2(
+; CHECK-NEXT:    ret <64 x i8> zeroinitializer
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16, i8 -125, i8 -3, i8 -51, i8 -30, i8 -6, i8 -9, i8 -35, i8 -68, i8 -101, i8 -118, i8 -102, i8 -24, i8 -15, i8 -3, i8 -13, i8 -17, i8 -124, i8 -4, i8 -56, i8 -29, i8 -7, i8 -10, i8 -36, i8 -69, i8 -102, i8 -117, i8 -103, i8 -25, i8 -14, i8 -4, i8 -14, i8 -18>)
+  ret <64 x i8> %1
+}
+
+define <16 x i8> @permute3(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute3(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 48, i8 17, i8 34, i8 51, i8 20, i8 37, i8 54, i8 23, i8 16, i8 49, i8 66, i8 19, i8 52, i8 69, i8 22, i8 55>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @permute3_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute3_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @permute3_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @permute3_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31, i32 36, i32 37, i32 38, i32 39, i32 36, i32 37, i32 38, i32 39, i32 44, i32 45, i32 46, i32 47, i32 44, i32 45, i32 46, i32 47, i32 52, i32 53, i32 54, i32 55, i32 52, i32 53, i32 54, i32 55, i32 60, i32 61, i32 62, i32 63, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111, i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 108, i8 109, i8 110, i8 111, i8 124, i8 125, i8 126, i8 127>)
+  ret <64 x i8> %1
+}
+
+; FIXME: Verify that instcombine is able to fold constant byte shuffles with undef mask elements.
+
+define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_undef_elts(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 16, i32 undef, i32 16, i32 1, i32 16, i32 undef, i32 16, i32 2, i32 16, i32 undef, i32 16, i32 3, i32 16, i32 undef, i32 16>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_undef_elts_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 32, i32 undef, i32 32, i32 1, i32 32, i32 undef, i32 32, i32 2, i32 32, i32 undef, i32 32, i32 3, i32 32, i32 undef, i32 32, i32 16, i32 48, i32 undef, i32 48, i32 17, i32 48, i32 undef, i32 48, i32 18, i32 48, i32 undef, i32 48, i32 19, i32 48, i32 undef, i32 48>
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @fold_with_undef_elts_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_undef_elts_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 0, i32 64, i32 undef, i32 64, i32 1, i32 64, i32 undef, i32 64, i32 2, i32 64, i32 undef, i32 64, i32 3, i32 64, i32 undef, i32 64, i32 16, i32 80, i32 undef, i32 80, i32 17, i32 80, i32 undef, i32 80, i32 18, i32 80, i32 undef, i32 80, i32 19, i32 80, i32 undef, i32 80, i32 32, i32 96, i32 undef, i32 96, i32 33, i32 96, i32 undef, i32 96, i32 34, i32 96, i32 undef, i32 96, i32 35, i32 96, i32 undef, i32 96, i32 48, i32 112, i32 undef, i32 112, i32 49, i32 112, i32 undef, i32 112, i32 50, i32 112, i32 undef, i32 112, i32 51, i32 112, i32 undef, i32 112>
+; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+  ret <64 x i8> %1
+}
+
+define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_allundef_elts(
+; CHECK-NEXT:    ret <16 x i8> undef
+;
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_allundef_elts_avx2(
+; CHECK-NEXT:    ret <32 x i8> undef
+;
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef)
+  ret <32 x i8> %1
+}
+
+define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_allundef_elts_avx512(
+; CHECK-NEXT:    ret <64 x i8> undef
+;
+  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> undef)
+  ret <64 x i8> %1
+}
+
+; Demanded elts tests.
+
+define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {
+; CHECK-LABEL: @demanded_elts_insertion(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %BaseMask)
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+;
+  %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0
+  %2 = insertelement <16 x i8> %1, i8 %M15, i32 15
+  %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %2)
+  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
+  ret <16 x i8> %4
+}
+
+define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) {
+; CHECK-LABEL: @demanded_elts_insertion_avx2(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %BaseMask)
+; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
+;
+  %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
+  %2 = insertelement <32 x i8> %1, i8 %M22, i32 22
+  %3 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %2)
+  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 undef, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %4
+}
+
+define <64 x i8> @demanded_elts_insertion_avx512(<64 x i8> %InVec, <64 x i8> %BaseMask, i8 %M0, i8 %M30) {
+; CHECK-LABEL: @demanded_elts_insertion_avx512(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <64 x i8> undef, i8 %M0, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> undef, <64 x i32> zeroinitializer
+; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
+;
+  %1 = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0
+  %2 = insertelement <64 x i8> %1, i8 %M30, i32 30
+  %3 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %2)
+  %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
+  ret <64 x i8> %4
+}
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
+declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
+declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse.ll
new file mode 100644
index 00000000000..830782b3b20
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-sse.ll
@@ -0,0 +1,611 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define float @test_rcp_ss_0(float %a) {
+; CHECK-LABEL: @test_rcp_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 0
+  ret float %6
+}
+
+define float @test_rcp_ss_1(float %a) {
+; CHECK-LABEL: @test_rcp_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 1
+  ret float %6
+}
+
+define float @test_sqrt_ss_0(float %a) {
+; CHECK-LABEL: @test_sqrt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.sqrt.f32(float %a)
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 0
+  ret float %6
+}
+
+define float @test_sqrt_ss_2(float %a) {
+; CHECK-LABEL: @test_sqrt_ss_2(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 2
+  ret float %6
+}
+
+define float @test_rsqrt_ss_0(float %a) {
+; CHECK-LABEL: @test_rsqrt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 0
+  ret float %6
+}
+
+define float @test_rsqrt_ss_3(float %a) {
+; CHECK-LABEL: @test_rsqrt_ss_3(
+; CHECK-NEXT:    ret float 3.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 3
+  ret float %6
+}
+
+define float @test_add_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_add_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_add_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_add_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
+define float @test_sub_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_sub_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_sub_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_sub_ss_2(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 2
+  ret float %7
+}
+
+define float @test_mul_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_mul_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_mul_ss_3(float %a, float %b) {
+; CHECK-LABEL: @test_mul_ss_3(
+; CHECK-NEXT:    ret float 3.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 3
+  ret float %7
+}
+
+define float @test_div_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_div_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_div_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_div_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
+define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_min_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
+define float @test_min_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_min_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    ret float [[TMP4]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
+  %10 = extractelement <4 x float> %9, i32 0
+  ret float %10
+}
+
+define float @test_min_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_min_ss_2(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 2
+  ret float %7
+}
+
+define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_max_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
+define float @test_max_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_max_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    ret float [[TMP4]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
+  %10 = extractelement <4 x float> %9, i32 0
+  ret float %10
+}
+
+define float @test_max_ss_3(float %a, float %b) {
+; CHECK-LABEL: @test_max_ss_3(
+; CHECK-NEXT:    ret float 3.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 3
+  ret float %7
+}
+
+define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_cmp_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
+  ret <4 x float> %4
+}
+
+define float @test_cmp_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_cmp_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_cmp_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_cmp_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
+define i32 @test_comieq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comieq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comige_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comige_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comigt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comigt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comile_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comile_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comilt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comilt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comineq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comineq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomieq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomieq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomige_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomige_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomigt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomigt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomile_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomile_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomilt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomilt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomineq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomineq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
+
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
+
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
+
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse2.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse2.ll
new file mode 100644
index 00000000000..721097e016f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-sse2.ll
@@ -0,0 +1,458 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define double @test_sqrt_sd_0(double %a) {
+; CHECK-LABEL: @test_sqrt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sqrt.f64(double %a)
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 0
+  ret double %4
+}
+
+define double @test_sqrt_sd_1(double %a) {
+; CHECK-LABEL: @test_sqrt_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 1
+  ret double %4
+}
+
+define double @test_add_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_add_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_add_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_add_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define double @test_sub_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_sub_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_sub_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_sub_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define double @test_mul_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_mul_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_mul_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_mul_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define double @test_div_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_div_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_div_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_div_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_min_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
+define double @test_min_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_min_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_min_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_min_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_max_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
+define double @test_max_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_max_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_max_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_max_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_cmp_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %b, i8 0)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0)
+  ret <2 x double> %2
+}
+
+define double @test_cmp_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_cmp_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i8 0)
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_cmp_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_cmp_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define i32 @test_comieq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comieq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comige_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comige_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comigt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comigt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comile_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comile_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comilt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comilt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comineq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comineq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomieq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomieq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomige_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomige_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomigt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomigt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomile_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomile_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomilt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomilt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomineq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomineq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8)
+
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>)
+
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse41.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse41.ll
new file mode 100644
index 00000000000..ddc3b7372ea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-sse41.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_round_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 10)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
+  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
+  ret <2 x double> %3
+}
+
+define <2 x double> @test_round_sd_floor(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_round_sd_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.floor.f64(double [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP2]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP3]]
+;
+  %1 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 1)
+  ret <2 x double> %1
+}
+
+define <2 x double> @test_round_sd_ceil(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_round_sd_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.ceil.f64(double [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP2]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP3]]
+;
+  %1 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 2)
+  ret <2 x double> %1
+}
+
+define double @test_round_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_round_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT:    ret double [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_round_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_round_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_round_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> [[B:%.*]], i32 10)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
+  %7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
+  ret <4 x float> %7
+}
+
+define <4 x float> @test_round_ss_floor(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_round_ss_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.floor.f32(float [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[A:%.*]], float [[TMP2]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
+  %1 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a, <4 x float> %b, i32 1)
+  ret <4 x float> %1
+}
+
+define <4 x float> @test_round_ss_ceil(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_round_ss_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.ceil.f32(float [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[A:%.*]], float [[TMP2]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
+  %1 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a, <4 x float> %b, i32 2)
+  ret <4 x float> %1
+}
+
+define float @test_round_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_round_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_round_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_round_ss_2(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
+  %r = extractelement <4 x float> %9, i32 2
+  ret float %r
+}
+
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll
new file mode 100644
index 00000000000..e33a382b7e1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll
@@ -0,0 +1,408 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; EXTRQ
+;
+
+define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg0(
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg1(
+; CHECK-NEXT:    ret <2 x i64> [[X:%.*]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_to_extqi(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 15)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 255, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 65535, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrq_call_constexpr(
+; CHECK-NEXT:    ret <2 x i64> [[X:%.*]]
+;
+  %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
+  ret <2 x i64> %1
+}
+
+;
+; EXTRQI
+;
+
+define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 23)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_undef(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_zero(
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 7, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 15, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_call_constexpr() {
+; CHECK-LABEL: @test_extrqi_call_constexpr(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16)
+  ret <2 x i64> %1
+}
+
+;
+; INSERTQ
+;
+
+define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_to_insertqi(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 32, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 33, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) {
+; CHECK-LABEL: @test_insertq_call_constexpr(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 0, i64 undef>, i8 2, i8 0)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
+  ret <2 x i64> %1
+}
+
+;
+; INSERTQI
+;
+
+define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_04uu(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = bitcast <16 x i8> %v to <2 x i64>
+  %2 = bitcast <16 x i8> %i to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
+  %4 = bitcast <2 x i64> %3 to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = bitcast <16 x i8> %v to <2 x i64>
+  %2 = bitcast <16 x i8> %i to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
+  %4 = bitcast <2 x i64> %3 to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @test_insertqi_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 -131055, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
+; CHECK-LABEL: @test_insertqi_call_constexpr(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 0, i64 undef>, i8 48, i8 3)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
+  ret <2 x i64> %1
+}
+
+; The result of this insert is the second arg, since the top 64 bits of
+; the result are undefined, and we copy the bottom 64 bits from the
+; second arg
+define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testInsert64Bits(
+; CHECK-NEXT:    ret <2 x i64> [[I:%.*]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testZeroLength(
+; CHECK-NEXT:    ret <2 x i64> [[I:%.*]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_1(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_2(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_3(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
+  ret <2 x i64> %1
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_args01(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 3, i8 2)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_args01(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll
new file mode 100644
index 00000000000..5ad8e767d76
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i16 @test1(float %f) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP281:%.*]] = fadd float %f, -1.000000e+00
+; CHECK-NEXT:    [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01
+; CHECK-NEXT:    [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0
+; CHECK-NEXT:    [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> <float 6.553500e+04, float undef, float undef, float undef>)
+; CHECK-NEXT:    [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> <float 0.000000e+00, float undef, float undef, float undef>)
+; CHECK-NEXT:    [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]])
+; CHECK-NEXT:    [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP69]]
+;
+  %tmp = insertelement <4 x float> undef, float %f, i32 0
+  %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
+  %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
+  %tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
+  %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
+  %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )
+  %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )
+  %tmp69 = trunc i32 %tmp.upgrd.1 to i16
+  ret i16 %tmp69
+}
+
+define i64 @test3(float %f, double %d) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]])
+; CHECK-NEXT:    [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]])
+; CHECK-NEXT:    [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]])
+; CHECK-NEXT:    [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]])
+; CHECK-NEXT:    [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]])
+; CHECK-NEXT:    [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]])
+; CHECK-NEXT:    [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]])
+; CHECK-NEXT:    [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]])
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+  %v00 = insertelement <4 x float> undef, float %f, i32 0
+  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
+  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
+  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
+  %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
+  %v10 = insertelement <4 x float> undef, float %f, i32 0
+  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
+  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
+  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
+  %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
+  %v20 = insertelement <4 x float> undef, float %f, i32 0
+  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
+  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
+  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
+  %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
+  %v30 = insertelement <4 x float> undef, float %f, i32 0
+  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
+  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
+  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
+  %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
+  %v40 = insertelement <2 x double> undef, double %d, i32 0
+  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
+  %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
+  %v50 = insertelement <2 x double> undef, double %d, i32 0
+  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
+  %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
+  %v60 = insertelement <2 x double> undef, double %d, i32 0
+  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
+  %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
+  %v70 = insertelement <2 x double> undef, double %d, i32 0
+  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
+  %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
+  %tmp8 = add i32 %tmp0, %tmp2
+  %tmp9 = add i32 %tmp4, %tmp6
+  %tmp10 = add i32 %tmp8, %tmp9
+  %tmp11 = sext i32 %tmp10 to i64
+  %tmp12 = add i64 %tmp1, %tmp3
+  %tmp13 = add i64 %tmp5, %tmp7
+  %tmp14 = add i64 %tmp12, %tmp13
+  %tmp15 = add i64 %tmp11, %tmp14
+  ret i64 %tmp15
+}
+
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
new file mode 100644
index 00000000000..306577fae82
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
@@ -0,0 +1,3436 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;
+; ASHR - Immediate
+;
+
+define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_128_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_128_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 63>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_256_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_256_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrai_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 0)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrai_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrai_w_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrai_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 0)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrai_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrai_d_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 0)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64)
+  ret <8 x i64> %1
+}
+
+;
+; LSHR - Immediate
+;
+
+define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_64(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_64(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_64(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_64(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_64(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_64(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrli_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 0)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrli_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrli_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrli_w_512_64(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 64)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrli_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 0)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrli_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrli_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrli_d_512_64(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 64)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrli_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 0)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrli_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrli_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrli_q_512_64(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 64)
+  ret <8 x i64> %1
+}
+
+;
+; SHL - Immediate
+;
+
+define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_64(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_64(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_64(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_64(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_64(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_64(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_pslli_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 0)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_pslli_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_pslli_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_pslli_w_512_64(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 64)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_pslli_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 0)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_pslli_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_pslli_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_pslli_d_512_64(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 64)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_pslli_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 0)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_pslli_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_pslli_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_pslli_q_512_64(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 64)
+  ret <8 x i64> %1
+}
+
+;
+; ASHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_128_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_128_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 63>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_256_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_256_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psra_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psra_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psra_w_512_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psra_w_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psra_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psra_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psra_d_512_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psra_d_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <8 x i64> %1
+}
+
+;
+; LSHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15_splat(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_64(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15_splat(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_64(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_64(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15_splat(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_64(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15_splat(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_64(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_64(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrl_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrl_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrl_w_512_15_splat(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrl_w_512_15_splat(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrl_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrl_w_512_64(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrl_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrl_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrl_d_512_15_splat(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrl_d_512_15_splat(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrl_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrl_d_512_64(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrl_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrl_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrl_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrl_q_512_64(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <8 x i64> %1
+}
+
+;
+; SHL - Constant Vector
+;
+
+define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15_splat(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_64(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15_splat(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_64(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_64(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15_splat(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_64(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15_splat(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_64(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_64(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psll_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psll_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psll_w_15_512_splat(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psll_w_15_512_splat(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psll_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psll_w_512_64(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psll_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psll_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psll_d_512_15_splat(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psll_d_512_15_splat(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psll_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psll_d_512_64(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psll_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psll_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psll_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psll_q_512_64(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <8 x i64> %1
+}
+
+;
+; ASHR - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <8 x i32> %1
+}
+
+define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
+  %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 0, i64 8>
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 undef>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 undef, i64 63, i64 63>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 undef, i64 8>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 0
+  %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 undef, i64 63, i64 63, i64 63, i64 undef, i64 63, i64 63>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_128_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_128_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
+  %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_256_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_256_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
+  %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_512_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 undef, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
+  ret <32 x i16> %2
+}
+
+;
+; LSHR - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
+; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
+; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
+  %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 8>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
+; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <4 x i64> %1
+}
+
+; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input.
+
+define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_undef(
+; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
+  %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_allbig(
+; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_allbig(
+; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_allbig(
+; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
+  %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_allbig(
+; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
+  %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_allbig(
+; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1)
+  ret <32 x i16> %2
+}
+
+;
+; SHL - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_allbig(
+; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_allbig(
+; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
+  %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 8>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_allbig(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_allbig(
+; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <4 x i64> %1
+}
+
+; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input.
+
+define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_undef(
+; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
+  %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_allbig(
+; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_allbig(
+; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_allbig(
+; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
+  %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_allbig(
+; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
+  %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_allbig(
+; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
+  ret <32 x i16> %2
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psra_w_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = bitcast <2 x i64> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psra_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_d_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = bitcast <8 x i16> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
+  ret <4 x i32> %3
+}
+
+define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psra_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psra_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psra_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psra_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx512_psra_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1)
+  ret <32 x i16> %2
+}
+
+define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx512_psra_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psra_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psrl_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psrl_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psrl_q_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
+;
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = bitcast <16 x i8> %1 to <8 x i16>
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
+  ret <16 x i16> %3
+}
+
+define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = bitcast <2 x i64> %1 to <4 x i32>
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
+  ret <8 x i32> %3
+}
+
+define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_q_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx512_psrl_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
+  ret <32 x i16> %2
+}
+
+define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) {
+; CHECK-LABEL: @avx512_psrl_w_512_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
+;
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = bitcast <16 x i8> %1 to <8 x i16>
+  %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2)
+  ret <32 x i16> %3
+}
+
+define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx512_psrl_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psrl_d_512_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = bitcast <2 x i64> %1 to <4 x i32>
+  %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2)
+  ret <16 x i32> %3
+}
+
+define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psrl_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psll_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psll_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psll_q_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psll_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psll_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psll_q_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx512_psll_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1)
+  ret <32 x i16> %2
+}
+
+define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx512_psll_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psll_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1)
+  ret <8 x i64> %2
+}
+
+;
+; Constant Folding
+;
+
+define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
+; CHECK-LABEL: @test_sse2_psra_w_0(
+; CHECK-NEXT:    ret <8 x i16> %A
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @test_sse2_psra_w_8() {
+; CHECK-LABEL: @test_sse2_psra_w_8(
+; CHECK-NEXT:    ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+;
+  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
+; CHECK-LABEL: @test_sse2_psra_d_0(
+; CHECK-NEXT:    ret <4 x i32> %A
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @sse2_psra_d_8() {
+; CHECK-LABEL: @sse2_psra_d_8(
+; CHECK-NEXT:    ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+;
+  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
+  ret <4 x i32> %4
+}
+
+define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_0(
+; CHECK-NEXT:    ret <16 x i16> %A
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
+  ret <16 x i16> %3
+}
+
+define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_8(
+; CHECK-NEXT:    ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+;
+  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
+  ret <16 x i16> %4
+}
+
+define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
+; CHECK-LABEL: @test_avx2_psra_d_0(
+; CHECK-NEXT:    ret <8 x i32> %A
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
+  ret <8 x i32> %3
+}
+
+define <8 x i32> @test_avx2_psra_d_8() {
+; CHECK-LABEL: @test_avx2_psra_d_8(
+; CHECK-NEXT:    ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+;
+  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
+  ret <8 x i32> %4
+}
+
+define <32 x i16> @test_avx512_psra_w_512_0(<32 x i16> %A) {
+; CHECK-LABEL: @test_avx512_psra_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %A
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %A, i32 0)
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %2, i32 0)
+  ret <32 x i16> %3
+}
+
+define <32 x i16> @test_avx512_psra_w_512_8(<32 x i16> %A) {
+; CHECK-LABEL: @test_avx512_psra_w_512_8(
+; CHECK-NEXT:    ret <32 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+;
+  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <32 x i16>
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %1, i32 3)
+  %3 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %4 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %3, i32 2)
+  ret <32 x i16> %4
+}
+
+define <16 x i32> @test_avx512_psra_d_512_0(<16 x i32> %A) {
+; CHECK-LABEL: @test_avx512_psra_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %A
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %A, i32 0)
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %2, i32 0)
+  ret <16 x i32> %3
+}
+
+define <16 x i32> @test_avx512_psra_d_512_8() {
+; CHECK-LABEL: @test_avx512_psra_d_512_8(
+; CHECK-NEXT:    ret <16 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+;
+  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i32>
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %1, i32 3)
+  %3 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %4 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %3, i32 2)
+  ret <16 x i32> %4
+}
+
+;
+; Old Tests
+;
+
+define <2 x i64> @test_sse2_1() {
+; CHECK-LABEL: @test_sse2_1(
+; CHECK-NEXT:    ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+;
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+}
+
+define <4 x i64> @test_avx2_1() {
+; CHECK-LABEL: @test_avx2_1(
+; CHECK-NEXT:    ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
+;
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+}
+
+define <2 x i64> @test_sse2_0() {
+; CHECK-LABEL: @test_sse2_0(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+}
+
+define <4 x i64> @test_avx2_0() {
+; CHECK-LABEL: @test_avx2_0(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+}
+define <2 x i64> @test_sse2_psrl_1() {
+; CHECK-LABEL: @test_sse2_psrl_1(
+; CHECK-NEXT:    ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
+;
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+}
+
+define <4 x i64> @test_avx2_psrl_1() {
+; CHECK-LABEL: @test_avx2_psrl_1(
+; CHECK-NEXT:    ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
+;
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+}
+
+define <2 x i64> @test_sse2_psrl_0() {
+; CHECK-LABEL: @test_sse2_psrl_0(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+}
+
+define <4 x i64> @test_avx2_psrl_0() {
+; CHECK-LABEL: @test_avx2_psrl_0(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+}
+
+declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) #1
+declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) #1
+declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) #1
+declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) #1
+declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) #1
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+
+declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) #1
+declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) #1
+declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) #1
+declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) #1
+declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) #1
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
+
+declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) #1
+declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) #1
+declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) #1
+declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) #1
+declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) #1
+declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
+
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
+declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1
+declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1
+declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1
+
+declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
+declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1
+declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1
+
+declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
+declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1
+declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1
+
+declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1
+declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1
+declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1
+declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1
+declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1
+declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1
+declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1
+declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1
+declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1
+
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
new file mode 100644
index 00000000000..f68eb36c4b5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
@@ -0,0 +1,298 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_ps(
+; CHECK-NEXT:    ret <4 x float> %v
+;
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 3>)
+  ret <4 x float> %a
+}
+
+define <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_ps_256(
+; CHECK-NEXT:    ret <8 x float> %v
+;
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+  ret <8 x float> %a
+}
+
+define <16 x float> @identity_test_vpermilvar_ps_512(<16 x float> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_ps_512(
+; CHECK-NEXT:    ret <16 x float> %v
+;
+  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>)
+  ret <16 x float> %a
+}
+
+define <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_pd(
+; CHECK-NEXT:    ret <2 x double> %v
+;
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 0, i64 2>)
+  ret <2 x double> %a
+}
+
+define <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_pd_256(
+; CHECK-NEXT:    ret <4 x double> %v
+;
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 0, i64 2, i64 0, i64 2>)
+  ret <4 x double> %a
+}
+
+define <8 x double> @identity_test_vpermilvar_pd_512(<8 x double> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_pd_512(
+; CHECK-NEXT:    ret <8 x double> %v
+;
+  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 0, i64 2, i64 0, i64 2, i64 0, i64 2, i64 0, i64 2>)
+  ret <8 x double> %a
+}
+
+; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_ps_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
+  ret <4 x float> %a
+}
+
+define <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
+  ret <8 x float> %a
+}
+
+define <16 x float> @zero_test_vpermilvar_ps_512_zero(<16 x float> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_ps_512_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> zeroinitializer)
+  ret <16 x float> %a
+}
+
+define <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_pd_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer)
+  ret <2 x double> %a
+}
+
+define <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer)
+  ret <4 x double> %a
+}
+
+define <8 x double> @zero_test_vpermilvar_pd_512_zero(<8 x double> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_pd_512_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> zeroinitializer)
+  ret <8 x double> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles.
+
+define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
+  ret <4 x float> %a
+}
+
+define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %a
+}
+
+define <16 x float> @test_vpermilvar_ps_512(<16 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <16 x float> %a
+}
+
+define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>)
+  ret <2 x double> %a
+}
+
+define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>)
+  ret <4 x double> %a
+}
+
+define <8 x double> @test_vpermilvar_pd_512(<8 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 3, i64 1, i64 2, i64 0, i64 7, i64 5, i64 6, i64 4>)
+  ret <8 x double> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles with undef mask elements.
+
+define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
+  ret <4 x float> %a
+}
+
+define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %a
+}
+
+define <16 x float> @undef_test_vpermilvar_ps_512(<16 x float> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_ps_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4, i32 undef, i32 10, i32 9, i32 undef, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0, i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
+  ret <16 x float> %a
+}
+
+define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
+  ret <2 x double> %a
+}
+
+define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
+  ret <4 x double> %a
+}
+
+define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_pd_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 undef, i32 0, i32 3, i32 undef, i32 undef, i32 4, i32 7, i32 undef>
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 undef, i64 1, i64 2, i64 undef, i64 undef, i64 1, i64 2, i64 undef>)
+  ret <8 x double> %a
+}
+
+; Simplify demanded elts
+
+define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps(
+; CHECK-NEXT:    ret <4 x float> %a0
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %a1, i32 3
+  %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1)
+  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x float> %3
+}
+
+define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 6, i32 undef, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = shufflevector <8 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 2, i32 1, i32 0>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
+  %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7>
+  ret <8 x float> %3
+}
+
+define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1)
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0
+  %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1)
+  %3 = shufflevector <16 x float> %2, <16 x float> undef, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x float> %3
+}
+
+define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd(
+; CHECK-NEXT:    ret <2 x double> %a0
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 2>, i64 %a1, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1)
+  %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  ret <2 x double> %3
+}
+
+define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 undef>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = shufflevector <4 x i64> <i64 0, i64 2, i64 0, i64 2>, <4 x i64> %a1, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %1)
+  %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x double> %3
+}
+
+define <8 x double> @elts_test_vpermilvar_pd_512(<8 x double> %a0, <8 x i64> %a1, i64 %a2) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i64> undef, i64 %a2, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = insertelement <8 x i64> %a1, i64 %a2, i32 0
+  %2 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %1)
+  %3 = shufflevector <8 x double> %2, <8 x double> undef, <8 x i32> zeroinitializer
+  ret <8 x double> %3
+}
+
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
+declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
+
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
+declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-xop.ll b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll
new file mode 100644
index 00000000000..03a3f921abb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll
@@ -0,0 +1,305 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <2 x double> @test_vfrcz_sd(<2 x double> %a) {
+; CHECK-LABEL: @test_vfrcz_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1)
+  ret <2 x double> %2
+}
+
+define double @test_vfrcz_sd_0(double %a) {
+; CHECK-LABEL: @test_vfrcz_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT:    ret double [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 0
+  ret double %4
+}
+
+define double @test_vfrcz_sd_1(double %a) {
+; CHECK-LABEL: @test_vfrcz_sd_1(
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 1
+  ret double %4
+}
+
+define <4 x float> @test_vfrcz_ss(<4 x float> %a) {
+; CHECK-LABEL: @test_vfrcz_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3)
+  ret <4 x float> %4
+}
+
+define float @test_vfrcz_ss_0(float %a) {
+; CHECK-LABEL: @test_vfrcz_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 0
+  ret float %6
+}
+
+define float @test_vfrcz_ss_3(float %a) {
+; CHECK-LABEL: @test_vfrcz_ss_3(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 3
+  ret float %6
+}
+
+define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_slt_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ult_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_sle_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ule_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sgt_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_ugt_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sge_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_uge_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_seq_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_ueq_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_sne_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_une_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_strue_v16i8(
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_utrue_v16i8(
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_sfalse_v16i8(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_ufalse_v16i8(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll
new file mode 100644
index 00000000000..8bbc833d5d4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/abs-1.ll
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @abs(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+
+; Test that the abs library call simplifier works correctly.
+; abs(x) -> x <s 0 ? -x : x.
+
+define i32 @test_abs(i32 %x) {
+; CHECK-LABEL: @test_abs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[NEG]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %ret = call i32 @abs(i32 %x)
+  ret i32 %ret
+}
+
+define i64 @test_labs(i64 %x) {
+; CHECK-LABEL: @test_labs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i64 0, [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[NEG]], i64 [[X]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ret = call i64 @labs(i64 %x)
+  ret i64 %ret
+}
+
+define i64 @test_llabs(i64 %x) {
+; CHECK-LABEL: @test_llabs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i64 0, [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[NEG]], i64 [[X]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ret = call i64 @llabs(i64 %x)
+  ret i64 %ret
+}
+
+; We have a canonical form of abs to make CSE easier.
+
+define i8 @abs_canonical_1(i8 %x) {
+; CHECK-LABEL: @abs_canonical_1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp sgt i8 %x, 0
+  %neg = sub i8 0, %x
+  %abs = select i1 %cmp, i8 %x, i8 %neg
+  ret i8 %abs
+}
+
+; Vectors should work too.
+
+define <2 x i8> @abs_canonical_2(<2 x i8> %x) {
+; CHECK-LABEL: @abs_canonical_2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[NEG]], <2 x i8> [[X]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
+  %neg = sub <2 x i8> zeroinitializer, %x
+  %abs = select <2 x i1> %cmp, <2 x i8> %x, <2 x i8> %neg
+  ret <2 x i8> %abs
+}
+
+; Even if a constant has undef elements.
+
+define <2 x i8> @abs_canonical_2_vec_undef_elts(<2 x i8> %x) {
+; CHECK-LABEL: @abs_canonical_2_vec_undef_elts(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[NEG]], <2 x i8> [[X]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %cmp = icmp sgt <2 x i8> %x, <i8 undef, i8 -1>
+  %neg = sub <2 x i8> zeroinitializer, %x
+  %abs = select <2 x i1> %cmp, <2 x i8> %x, <2 x i8> %neg
+  ret <2 x i8> %abs
+}
+
+; NSW should not change.
+
+define i8 @abs_canonical_3(i8 %x) {
+; CHECK-LABEL: @abs_canonical_3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp slt i8 %x, 0
+  %neg = sub nsw i8 0, %x
+  %abs = select i1 %cmp, i8 %neg, i8 %x
+  ret i8 %abs
+}
+
+define i8 @abs_canonical_4(i8 %x) {
+; CHECK-LABEL: @abs_canonical_4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp slt i8 %x, 1
+  %neg = sub i8 0, %x
+  %abs = select i1 %cmp, i8 %neg, i8 %x
+  ret i8 %abs
+}
+
+define i32 @abs_canonical_5(i8 %x) {
+; CHECK-LABEL: @abs_canonical_5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[X]] to i32
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[CONV]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[NEG]], i32 [[CONV]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %cmp = icmp sgt i8 %x, 0
+  %conv = sext i8 %x to i32
+  %neg = sub i32 0, %conv
+  %abs = select i1 %cmp, i32 %conv, i32 %neg
+  ret i32 %abs
+}
+
+define i32 @abs_canonical_6(i32 %a, i32 %b) {
+; CHECK-LABEL: @abs_canonical_6(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %tmp2 = sub i32 %b, %a
+  %abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
+  ret i32 %abs
+}
+
+define <2 x i8> @abs_canonical_7(<2 x i8> %a, <2 x i8 > %b) {
+; CHECK-LABEL: @abs_canonical_7(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[TMP2]], <2 x i8> [[TMP1]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+
+  %tmp1 = sub <2 x i8> %a, %b
+  %cmp = icmp sgt <2 x i8> %tmp1, <i8 -1, i8 -1>
+  %tmp2 = sub <2 x i8> %b, %a
+  %abs = select <2 x i1> %cmp, <2 x i8> %tmp1, <2 x i8> %tmp2
+  ret <2 x i8> %abs
+}
+
+define i32 @abs_canonical_8(i32 %a) {
+; CHECK-LABEL: @abs_canonical_8(
+; CHECK-NEXT:    [[TMP:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp = sub i32 0, %a
+  %cmp = icmp slt i32 %tmp, 0
+  %abs = select i1 %cmp, i32 %a, i32 %tmp
+  ret i32 %abs
+}
+
+define i32 @abs_canonical_9(i32 %a, i32 %b) {
+; CHECK-LABEL: @abs_canonical_9(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[B]], [[A]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[ABS]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %tmp2 = sub i32 %b, %a
+  %abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
+  %add = add i32 %abs, %tmp2 ; increase use count for %tmp2.
+  ret i32 %add
+}
+
+define i32 @abs_canonical_10(i32 %a, i32 %b) {
+; CHECK-LABEL: @abs_canonical_10(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NEGTMP:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[NEGTMP]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp2 = sub i32 %b, %a
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
+  ret i32 %abs
+}
+
+; We have a canonical form of nabs to make CSE easier.
+
+define i8 @nabs_canonical_1(i8 %x) {
+; CHECK-LABEL: @nabs_canonical_1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp sgt i8 %x, 0
+  %neg = sub i8 0, %x
+  %abs = select i1 %cmp, i8 %neg, i8 %x
+  ret i8 %abs
+}
+
+; Vectors should work too.
+
+define <2 x i8> @nabs_canonical_2(<2 x i8> %x) {
+; CHECK-LABEL: @nabs_canonical_2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[NEG]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
+  %neg = sub <2 x i8> zeroinitializer, %x
+  %abs = select <2 x i1> %cmp, <2 x i8> %neg, <2 x i8> %x
+  ret <2 x i8> %abs
+}
+
+; Even if a constant has undef elements.
+
+define <2 x i8> @nabs_canonical_2_vec_undef_elts(<2 x i8> %x) {
+; CHECK-LABEL: @nabs_canonical_2_vec_undef_elts(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[NEG]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 undef>
+  %neg = sub <2 x i8> zeroinitializer, %x
+  %abs = select <2 x i1> %cmp, <2 x i8> %neg, <2 x i8> %x
+  ret <2 x i8> %abs
+}
+
+; NSW should not change.
+
+define i8 @nabs_canonical_3(i8 %x) {
+; CHECK-LABEL: @nabs_canonical_3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp slt i8 %x, 0
+  %neg = sub nsw i8 0, %x
+  %abs = select i1 %cmp, i8 %x, i8 %neg
+  ret i8 %abs
+}
+
+define i8 @nabs_canonical_4(i8 %x) {
+; CHECK-LABEL: @nabs_canonical_4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp slt i8 %x, 1
+  %neg = sub i8 0, %x
+  %abs = select i1 %cmp, i8 %x, i8 %neg
+  ret i8 %abs
+}
+
+define i32 @nabs_canonical_5(i8 %x) {
+; CHECK-LABEL: @nabs_canonical_5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[X]] to i32
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[CONV]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[CONV]], i32 [[NEG]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %cmp = icmp sgt i8 %x, 0
+  %conv = sext i8 %x to i32
+  %neg = sub i32 0, %conv
+  %abs = select i1 %cmp, i32 %neg, i32 %conv
+  ret i32 %abs
+}
+
+define i32 @nabs_canonical_6(i32 %a, i32 %b) {
+; CHECK-LABEL: @nabs_canonical_6(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %tmp2 = sub i32 %b, %a
+  %abs = select i1 %cmp, i32 %tmp2, i32 %tmp1
+  ret i32 %abs
+}
+
+define <2 x i8> @nabs_canonical_7(<2 x i8> %a, <2 x i8 > %b) {
+; CHECK-LABEL: @nabs_canonical_7(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %tmp1 = sub <2 x i8> %a, %b
+  %cmp = icmp sgt <2 x i8> %tmp1, <i8 -1, i8 -1>
+  %tmp2 = sub <2 x i8> %b, %a
+  %abs = select <2 x i1> %cmp, <2 x i8> %tmp2, <2 x i8> %tmp1
+  ret <2 x i8> %abs
+}
+
+define i32 @nabs_canonical_8(i32 %a) {
+; CHECK-LABEL: @nabs_canonical_8(
+; CHECK-NEXT:    [[TMP:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[A]], i32 [[TMP]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp = sub i32 0, %a
+  %cmp = icmp slt i32 %tmp, 0
+  %abs = select i1 %cmp, i32 %tmp, i32 %a
+  ret i32 %abs
+}
+
+define i32 @nabs_canonical_9(i32 %a, i32 %b) {
+; CHECK-LABEL: @nabs_canonical_9(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[B]], [[A]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[ABS]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %tmp2 = sub i32 %b, %a
+  %abs = select i1 %cmp, i32 %tmp2, i32 %tmp1
+  %add = add i32 %tmp2, %abs ; increase use count for %tmp2
+  ret i32 %add
+}
+
+define i32 @nabs_canonical_10(i32 %a, i32 %b) {
+; CHECK-LABEL: @nabs_canonical_10(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NEGTMP:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[NEGTMP]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp2 = sub i32 %b, %a
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp slt i32 %tmp1, 1
+  %abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
+  ret i32 %abs
+}
+
+; The following 5 tests use a shift+add+xor to implement abs():
+; B = ashr i8 A, 7  -- smear the sign bit.
+; xor (add A, B), B -- add -1 and flip bits if negative
+
+define i8 @shifty_abs_commute0(i8 %x) {
+; CHECK-LABEL: @shifty_abs_commute0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %signbit = ashr i8 %x, 7
+  %add = add i8 %signbit, %x
+  %abs = xor i8 %add, %signbit
+  ret i8 %abs
+}
+
+define i8 @shifty_abs_commute0_nsw(i8 %x) {
+; CHECK-LABEL: @shifty_abs_commute0_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %signbit = ashr i8 %x, 7
+  %add = add nsw i8 %signbit, %x
+  %abs = xor i8 %add, %signbit
+  ret i8 %abs
+}
+
+; The nuw flag creates a contradiction. If the shift produces all 1s, the only
+; way for the add to not wrap is for %x to be 0, but then the shift couldn't
+; have produced all 1s. We partially optimize this.
+define i8 @shifty_abs_commute0_nuw(i8 %x) {
+; CHECK-LABEL: @shifty_abs_commute0_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %signbit = ashr i8 %x, 7
+  %add = add nuw i8 %signbit, %x
+  %abs = xor i8 %add, %signbit
+  ret i8 %abs
+}
+
+define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) {
+; CHECK-LABEL: @shifty_abs_commute1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[X]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %signbit = ashr <2 x i8> %x, <i8 7, i8 7>
+  %add = add <2 x i8> %signbit, %x
+  %abs = xor <2 x i8> %signbit, %add
+  ret <2 x i8> %abs
+}
+
+define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) {
+; CHECK-LABEL: @shifty_abs_commute2(
+; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[Y]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[Y]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[Y]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %y = mul <2 x i8> %x, <i8 3, i8 3>   ; extra op to thwart complexity-based canonicalization
+  %signbit = ashr <2 x i8> %y, <i8 7, i8 7>
+  %add = add <2 x i8> %y, %signbit
+  %abs = xor <2 x i8> %signbit, %add
+  ret <2 x i8> %abs
+}
+
+define i8 @shifty_abs_commute3(i8 %x) {
+; CHECK-LABEL: @shifty_abs_commute3(
+; CHECK-NEXT:    [[Y:%.*]] = mul i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[Y]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %y = mul i8 %x, 3                    ; extra op to thwart complexity-based canonicalization
+  %signbit = ashr i8 %y, 7
+  %add = add i8 %y, %signbit
+  %abs = xor i8 %add, %signbit
+  ret i8 %abs
+}
+
+; Negative test - don't transform if it would increase instruction count.
+
+declare void @extra_use(i8)
+
+define i8 @shifty_abs_too_many_uses(i8 %x) {
+; CHECK-LABEL: @shifty_abs_too_many_uses(
+; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SIGNBIT]], [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
+; CHECK-NEXT:    call void @extra_use(i8 [[SIGNBIT]])
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %signbit = ashr i8 %x, 7
+  %add = add i8 %x, %signbit
+  %abs = xor i8 %add, %signbit
+  call void @extra_use(i8 %signbit)
+  ret i8 %abs
+}
+
+; There's another way to make abs() using shift, xor, and subtract.
+; PR36036 - https://bugs.llvm.org/show_bug.cgi?id=36036
+
+define i8 @shifty_sub(i8 %x) {
+; CHECK-LABEL: @shifty_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sh = ashr i8 %x, 7
+  %xor = xor i8 %x, %sh
+  %r = sub i8 %xor, %sh
+  ret i8 %r
+}
+
+define i8 @shifty_sub_nsw_commute(i8 %x) {
+; CHECK-LABEL: @shifty_sub_nsw_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sh = ashr i8 %x, 7
+  %xor = xor i8 %sh, %x
+  %r = sub nsw i8 %xor, %sh
+  ret i8 %r
+}
+
+define <4 x i32> @shifty_sub_nuw_vec_commute(<4 x i32> %x) {
+; CHECK-LABEL: @shifty_sub_nuw_vec_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sh = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+  %xor = xor <4 x i32> %sh, %x
+  %r = sub nuw <4 x i32> %xor, %sh
+  ret <4 x i32> %r
+}
+
+define i12 @shifty_sub_nsw_nuw(i12 %x) {
+; CHECK-LABEL: @shifty_sub_nsw_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i12 [[X:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i12 [[X]], i12 0
+; CHECK-NEXT:    ret i12 [[R]]
+;
+  %sh = ashr i12 %x, 11
+  %xor = xor i12 %x, %sh
+  %r = sub nsw nuw i12 %xor, %sh
+  ret i12 %r
+}
+
+define i8 @negate_abs(i8 %x) {
+; CHECK-LABEL: @negate_abs(
+; CHECK-NEXT:    [[N:%.*]] = sub i8 0, [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[X]], 0
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[N]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %n = sub i8 0, %x
+  %c = icmp slt i8 %x, 0
+  %s = select i1 %c, i8 %n, i8 %x
+  %r = sub i8 0, %s
+  ret i8 %r
+}
+
+define <2 x i8> @negate_nabs(<2 x i8> %x) {
+; CHECK-LABEL: @negate_nabs(
+; CHECK-NEXT:    [[N:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i8> [[X]], zeroinitializer
+; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[N]], <2 x i8> [[X]]
+; CHECK-NEXT:    ret <2 x i8> [[S]]
+;
+  %n = sub <2 x i8> zeroinitializer, %x
+  %c = icmp slt <2 x i8> %x, zeroinitializer
+  %s = select <2 x i1> %c, <2 x i8> %x, <2 x i8> %n
+  %r = sub <2 x i8> zeroinitializer, %s
+  ret <2 x i8> %r
+}
+
+define i1 @abs_must_be_positive(i32 %x) {
+; CHECK-LABEL: @abs_must_be_positive(
+; CHECK-NEXT:    ret i1 true
+;
+  %negx = sub nsw i32 0, %x
+  %c = icmp sge i32 %x, 0
+  %sel = select i1 %c, i32 %x, i32 %negx
+  %c2 = icmp sge i32 %sel, 0
+  ret i1 %c2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/abs_abs.ll b/llvm/test/Transforms/InstCombine/abs_abs.ll
new file mode 100644
index 00000000000..207ceb5215a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/abs_abs.ll
@@ -0,0 +1,1346 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @abs_abs_x01(i32 %x) {
+; CHECK-LABEL: @abs_abs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define <2 x i32> @abs_abs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_abs_x01_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %cmp = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cond = select <2 x i1> %cmp, <2 x i32> %x, <2 x i32> %sub
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+define i32 @abs_abs_x02(i32 %x) {
+; CHECK-LABEL: @abs_abs_x02(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x03(i32 %x) {
+; CHECK-LABEL: @abs_abs_x03(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x04(i32 %x) {
+; CHECK-LABEL: @abs_abs_x04(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define <2 x i32> @abs_abs_x04_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_abs_x04_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %cmp = icmp slt <2 x i32> %x, <i32 1, i32 1>
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cond = select <2 x i1> %cmp, <2 x i32> %sub, <2 x i32> %x
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+define i32 @abs_abs_x05(i32 %x) {
+; CHECK-LABEL: @abs_abs_x05(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x06(i32 %x) {
+; CHECK-LABEL: @abs_abs_x06(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x07(i32 %x) {
+; CHECK-LABEL: @abs_abs_x07(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x08(i32 %x) {
+; CHECK-LABEL: @abs_abs_x08(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x09(i32 %x) {
+; CHECK-LABEL: @abs_abs_x09(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x10(i32 %x) {
+; CHECK-LABEL: @abs_abs_x10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x11(i32 %x) {
+; CHECK-LABEL: @abs_abs_x11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x12(i32 %x) {
+; CHECK-LABEL: @abs_abs_x12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x13(i32 %x) {
+; CHECK-LABEL: @abs_abs_x13(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x14(i32 %x) {
+; CHECK-LABEL: @abs_abs_x14(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x15(i32 %x) {
+; CHECK-LABEL: @abs_abs_x15(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x16(i32 %x) {
+; CHECK-LABEL: @abs_abs_x16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+; abs(abs(-x)) -> abs(-x) -> abs(x)
+define i32 @abs_abs_x17(i32 %x) {
+; CHECK-LABEL: @abs_abs_x17(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %sub = sub nsw i32 0, %x
+  %cmp = icmp sgt i32 %sub, -1
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; abs(abs(x - y)) -> abs(x - y)
+define i32 @abs_abs_x18(i32 %x, i32 %y) {
+; CHECK-LABEL: @abs_abs_x18(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[NEGA]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %a = sub nsw i32 %x, %y
+  %b = sub nsw i32 %y, %x
+  %cmp = icmp sgt i32 %a, -1
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; abs(abs(-x)) -> abs(-x) -> abs(x)
+define <2 x i32> @abs_abs_x02_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_abs_x02_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %sub, <2 x i32> %x
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+; abs(abs(x - y)) -> abs(x - y)
+define <2 x i32> @abs_abs_x03_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @abs_abs_x03_vec(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[NEGA:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[NEGA]], <2 x i32> [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = sub nsw <2 x i32> %y, %x
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+define i32 @nabs_nabs_x01(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x02(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x02(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x03(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x03(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x04(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x04(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x05(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x05(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x06(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x06(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x07(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x07(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x08(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x08(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x09(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x09(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x10(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x11(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x12(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x13(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x13(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x14(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x14(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x15(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x15(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x16(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; nabs(nabs(-x)) -> nabs(-x) -> nabs(x)
+define i32 @nabs_nabs_x17(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x17(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %sub = sub nsw i32 0, %x
+  %cmp = icmp sgt i32 %sub, -1
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub16, i32 %cond
+  ret i32 %cond18
+}
+
+; nabs(nabs(x - y)) -> nabs(x - y)
+define i32 @nabs_nabs_x18(i32 %x, i32 %y) {
+; CHECK-LABEL: @nabs_nabs_x18(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[A]], i32 [[NEGA]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %a = sub nsw i32 %x, %y
+  %b = sub nsw i32 %y, %x
+  %cmp = icmp sgt i32 %a, -1
+  %cond = select i1 %cmp, i32 %b, i32 %a
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub16, i32 %cond
+  ret i32 %cond18
+}
+
+; nabs(nabs(-x)) -> nabs(-x) -> nabs(x)
+define <2 x i32> @nabs_nabs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @nabs_nabs_x01_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[X]], <2 x i32> [[SUB]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %x, <2 x i32> %sub
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %sub16, <2 x i32> %cond
+  ret <2 x i32> %cond18
+}
+
+; nabs(nabs(x - y)) -> nabs(x - y)
+define <2 x i32> @nabs_nabs_x02_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @nabs_nabs_x02_vec(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[NEGA:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[A]], <2 x i32> [[NEGA]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = sub nsw <2 x i32> %y, %x
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %b, <2 x i32> %a
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %sub16, <2 x i32> %cond
+  ret <2 x i32> %cond18
+}
+
+define i32 @abs_nabs_x01(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x02(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x02(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x03(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x03(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x04(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x04(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x05(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x05(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x06(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x06(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x07(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x07(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x08(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x08(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x09(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x09(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x10(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x11(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x12(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x13(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x13(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x14(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x14(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x15(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x15(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x16(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+; abs(nabs(-x)) -> abs(-x) -> abs(x)
+define i32 @abs_nabs_x17(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x17(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %sub = sub nsw i32 0, %x
+  %cmp = icmp sgt i32 %sub, -1
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; abs(nabs(x - y)) -> abs(x - y)
+define i32 @abs_nabs_x18(i32 %x, i32 %y) {
+; CHECK-LABEL: @abs_nabs_x18(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[NEGA]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %a = sub nsw i32 %x, %y
+  %b = sub nsw i32 %y, %x
+  %cmp = icmp sgt i32 %a, -1
+  %cond = select i1 %cmp, i32 %b, i32 %a
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; abs(nabs(-x)) -> abs(-x) -> abs(x)
+define <2 x i32> @abs_nabs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_nabs_x01_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %x, <2 x i32> %sub
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+; abs(nabs(x - y)) -> abs(x - y)
+define <2 x i32> @abs_nabs_x02_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @abs_nabs_x02_vec(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[NEGA:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[NEGA]], <2 x i32> [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = sub nsw <2 x i32> %y, %x
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %b, <2 x i32> %a
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+define i32 @nabs_abs_x01(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x02(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x02(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x03(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x03(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x04(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x04(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x05(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x05(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x06(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x06(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x07(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x07(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x08(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x08(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x09(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x09(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x10(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x11(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x12(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x13(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x13(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x14(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x14(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x15(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x15(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x16(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; nabs(abs(-x)) -> nabs(-x) -> nabs(x)
+define i32 @nabs_abs_x17(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x17(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %sub = sub nsw i32 0, %x
+  %cmp = icmp sgt i32 %sub, -1
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub16, i32 %cond
+  ret i32 %cond18
+}
+
+; nabs(abs(x - y)) -> nabs(x - y)
+define i32 @nabs_abs_x18(i32 %x, i32 %y) {
+; CHECK-LABEL: @nabs_abs_x18(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[A]], i32 [[NEGA]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %a = sub nsw i32 %x, %y
+  %b = sub nsw i32 %y, %x
+  %cmp = icmp sgt i32 %a, -1
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub16, i32 %cond
+  ret i32 %cond18
+}
+
+; nabs(abs(-x)) -> nabs(-x) -> nabs(x)
+define <2 x i32> @nabs_abs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @nabs_abs_x01_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[X]], <2 x i32> [[SUB]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %sub, <2 x i32> %x
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %sub16, <2 x i32> %cond
+  ret <2 x i32> %cond18
+}
+
+; nabs(abs(x - y)) -> nabs(x - y)
+define <2 x i32> @nabs_abs_x02_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @nabs_abs_x02_vec(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[NEGA:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[A]], <2 x i32> [[NEGA]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = sub nsw <2 x i32> %y, %x
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %sub16, <2 x i32> %cond
+  ret <2 x i32> %cond18
+}
diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll
new file mode 100644
index 00000000000..105c9efa089
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/add-sitofp.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @x(i32 %a, i32 %b) {
+; CHECK-LABEL: @x(
+; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[A:%.*]], 24
+; CHECK-NEXT:    [[N:%.*]] = and i32 [[M]], [[B:%.*]]
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[N]], 1
+; CHECK-NEXT:    [[P:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[P]]
+;
+  %m = lshr i32 %a, 24
+  %n = and i32 %m, %b
+  %o = sitofp i32 %n to double
+  %p = fadd double %o, 1.0
+  ret double %p
+}
+
+define double @test(i32 %a) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], 1
+; CHECK-NEXT:    [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + 1 doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %a_and_fp = sitofp i32 %a_and to double
+  %res = fadd double %a_and_fp, 1.0
+  ret double %res
+}
+
+define float @test_neg(i32 %a) {
+; CHECK-LABEL: @test_neg(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[A_AND_FP]], 1.000000e+00
+; CHECK-NEXT:    ret float [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + 1 doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %a_and_fp = sitofp i32 %a_and to float
+  %res = fadd float %a_and_fp, 1.0
+  ret float %res
+}
+
+define double @test_2(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_2(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]]
+; CHECK-NEXT:    [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %b_and = and i32 %b, 1073741823
+
+  %a_and_fp = sitofp i32 %a_and to double
+  %b_and_fp = sitofp i32 %b_and to double
+
+  %res = fadd double %a_and_fp, %b_and_fp
+  ret double %res
+}
+
+define float @test_2_neg(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_2_neg(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
+; CHECK-NEXT:    [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float
+; CHECK-NEXT:    [[B_AND_FP:%.*]] = sitofp i32 [[B_AND]] to float
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[A_AND_FP]], [[B_AND_FP]]
+; CHECK-NEXT:    ret float [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %b_and = and i32 %b, 1073741823
+
+  %a_and_fp = sitofp i32 %a_and to float
+  %b_and_fp = sitofp i32 %b_and to float
+
+  %res = fadd float %a_and_fp, %b_and_fp
+  ret float %res
+}
+
+; This test demonstrates overly conservative legality check. The float addition
+; can be replaced with the integer addition because the result of the operation
+; can be represented in float, but we don't do that now.
+define float @test_3(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_3(
+; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[A:%.*]], 24
+; CHECK-NEXT:    [[N:%.*]] = and i32 [[M]], [[B:%.*]]
+; CHECK-NEXT:    [[O:%.*]] = sitofp i32 [[N]] to float
+; CHECK-NEXT:    [[P:%.*]] = fadd float [[O]], 1.000000e+00
+; CHECK-NEXT:    ret float [[P]]
+;
+  %m = lshr i32 %a, 24
+  %n = and i32 %m, %b
+  %o = sitofp i32 %n to float
+  %p = fadd float %o, 1.0
+  ret float %p
+}
+
+define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @test_4(
+; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]]
+; CHECK-NEXT:    [[RES:%.*]] = sitofp <4 x i32> [[ADDCONV]] to <4 x double>
+; CHECK-NEXT:    ret <4 x double> [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and <4 x i32> %a, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+  %b_and = and <4 x i32> %b, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+
+  %a_and_fp = sitofp <4 x i32> %a_and to <4 x double>
+  %b_and_fp = sitofp <4 x i32> %b_and to <4 x double>
+
+  %res = fadd <4 x double> %a_and_fp, %b_and_fp
+  ret <4 x double> %res
+}
+
+define <4 x float> @test_4_neg(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @test_4_neg(
+; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[A_AND_FP:%.*]] = sitofp <4 x i32> [[A_AND]] to <4 x float>
+; CHECK-NEXT:    [[B_AND_FP:%.*]] = sitofp <4 x i32> [[B_AND]] to <4 x float>
+; CHECK-NEXT:    [[RES:%.*]] = fadd <4 x float> [[A_AND_FP]], [[B_AND_FP]]
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and <4 x i32> %a, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+  %b_and = and <4 x i32> %b, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+
+  %a_and_fp = sitofp <4 x i32> %a_and to <4 x float>
+  %b_and_fp = sitofp <4 x i32> %b_and to <4 x float>
+
+  %res = fadd <4 x float> %a_and_fp, %b_and_fp
+  ret <4 x float> %res
+}
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
new file mode 100644
index 00000000000..0f805e856f8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -0,0 +1,980 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @select_0_or_1_from_bool(i1 %x) {
+; CHECK-LABEL: @select_0_or_1_from_bool(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    [[ADD:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %ext = sext i1 %x to i32
+  %add = add i32 %ext, 1
+  ret i32 %add
+}
+
+define <2 x i32> @select_0_or_1_from_bool_vec(<2 x i1> %x) {
+; CHECK-LABEL: @select_0_or_1_from_bool_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[ADD:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[ADD]]
+;
+  %ext = sext <2 x i1> %x to <2 x i32>
+  %add = add <2 x i32> %ext, <i32 1, i32 1>
+  ret <2 x i32> %add
+}
+
+define i32 @select_C_minus_1_or_C_from_bool(i1 %x) {
+; CHECK-LABEL: @select_C_minus_1_or_C_from_bool(
+; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[X:%.*]], i32 41, i32 42
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %ext = sext i1 %x to i32
+  %add = add i32 %ext, 42
+  ret i32 %add
+}
+
+define <2 x i32> @select_C_minus_1_or_C_from_bool_vec(<2 x i1> %x) {
+; CHECK-LABEL: @select_C_minus_1_or_C_from_bool_vec(
+; CHECK-NEXT:    [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 41, i32 42>, <2 x i32> <i32 42, i32 43>
+; CHECK-NEXT:    ret <2 x i32> [[ADD]]
+;
+  %ext = sext <2 x i1> %x to <2 x i32>
+  %add = add <2 x i32> %ext, <i32 42, i32 43>
+  ret <2 x i32> %add
+}
+
+; This is an 'andn' of the low bit.
+
+define i32 @flip_and_mask(i32 %x) {
+; CHECK-LABEL: @flip_and_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[INC:%.*]] = xor i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i32 [[INC]]
+;
+  %shl = shl i32 %x, 31
+  %shr = ashr i32 %shl, 31
+  %inc = add i32 %shr, 1
+  ret i32 %inc
+}
+
+define <2 x i8> @flip_and_mask_splat(<2 x i8> %x) {
+; CHECK-LABEL: @flip_and_mask_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[INC:%.*]] = xor <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    ret <2 x i8> [[INC]]
+;
+  %shl = shl <2 x i8> %x, <i8 7, i8 7>
+  %shr = ashr <2 x i8> %shl, <i8 7, i8 7>
+  %inc = add <2 x i8> %shr, <i8 1, i8 1>
+  ret <2 x i8> %inc
+}
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = add i32 %A, 0
+  ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = add i32 %A, 5
+  %C = add i32 %B, -5
+  ret i32 %C
+}
+
+define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = add i32 %A, 5
+  %C = sub i32 %B, 5
+  ret i32 %C
+}
+
+; D = B + -A = B - A
+define i32 @test4(i32 %A, i32 %B) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[D:%.*]] = sub i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = sub i32 0, %A
+  %D = add i32 %B, %C
+  ret i32 %D
+}
+
+; D = -A + B = B - A
+define i32 @test5(i32 %A, i32 %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[D:%.*]] = sub i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = sub i32 0, %A
+  %D = add i32 %C, %B
+  ret i32 %D
+}
+
+define <2 x i8> @neg_op0_vec_undef_elt(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @neg_op0_vec_undef_elt(
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %nega = sub <2 x i8> <i8 0, i8 undef>, %a
+  %r = add <2 x i8> %nega, %b
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @neg_neg_vec_undef_elt(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @neg_neg_vec_undef_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %nega = sub <2 x i8> <i8 undef, i8 0>, %a
+  %negb = sub <2 x i8> <i8 undef, i8 0>, %b
+  %r = add <2 x i8> %nega, %negb
+  ret <2 x i8> %r
+}
+
+; C = 7*A+A == 8*A == A << 3
+define i32 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[A:%.*]], 3
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = mul i32 7, %A
+  %C = add i32 %B, %A
+  ret i32 %C
+}
+
+; C = A+7*A == 8*A == A << 3
+define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[A:%.*]], 3
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = mul i32 7, %A
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+; (A & C1)+(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+define i32 @test8(i32 %A, i32 %B) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[A1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[B1:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[C:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A1 = and i32 %A, 7
+  %B1 = and i32 %B, 128
+  %C = add i32 %A1, %B1
+  ret i32 %C
+}
+
+define i32 @test9(i32 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[A:%.*]], 5
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = shl i32 %A, 4
+  %C = add i32 %B, %B
+  ret i32 %C
+}
+
+; a != -b
+define i1 @test10(i8 %a, i8 %b) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[ADD:%.*]] = sub i8 0, [[B:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[ADD]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %add = add i8 %a, %b
+  %c = icmp ne i8 %add, 0
+  ret i1 %c
+}
+
+define <2 x i1> @test10vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @test10vec(
+; CHECK-NEXT:    [[C:%.*]] = sub <2 x i8> zeroinitializer, [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> [[C]], [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %c = add <2 x i8> %a, %b
+  %d = icmp ne <2 x i8> %c, zeroinitializer
+  ret <2 x i1> %d
+}
+
+define i1 @test11(i8 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = add i8 %A, -1
+  %c = icmp ne i8 %B, 0
+  ret i1 %c
+}
+
+define <2 x i1> @test11vec(<2 x i8> %a) {
+; CHECK-LABEL: @test11vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add <2 x i8> %a, <i8 -1, i8 -1>
+  %c = icmp ne <2 x i8> %b, zeroinitializer
+  ret <2 x i1> %c
+}
+
+; Should be transformed into shl A, 1?
+
+define i32 @test12(i32 %A, i32 %B) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    br label [[X:%.*]]
+; CHECK:       X:
+; CHECK-NEXT:    [[C_OK:%.*]] = add i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[C_OK]], [[A]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C_OK = add i32 %B, %A
+  br label %X
+
+X:              ; preds = %0
+  %D = add i32 %C_OK, %A
+  ret i32 %D
+}
+
+;; TODO: shl A, 1?
+define i32 @test13(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[D_OK:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[E_OK:%.*]] = add i32 [[D_OK]], [[C:%.*]]
+; CHECK-NEXT:    [[F:%.*]] = add i32 [[E_OK]], [[A]]
+; CHECK-NEXT:    ret i32 [[F]]
+;
+  %D_OK = add i32 %A, %B
+  %E_OK = add i32 %D_OK, %C
+  %F = add i32 %E_OK, %A
+  ret i32 %F
+}
+
+define i32 @test14(i32 %offset, i32 %difference) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[TMP_2:%.*]] = and i32 [[DIFFERENCE:%.*]], 3
+; CHECK-NEXT:    [[TMP_3_OK:%.*]] = add i32 [[TMP_2]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP_5_MASK:%.*]] = and i32 [[DIFFERENCE]], -4
+; CHECK-NEXT:    [[TMP_8:%.*]] = add i32 [[TMP_3_OK]], [[TMP_5_MASK]]
+; CHECK-NEXT:    ret i32 [[TMP_8]]
+;
+  %tmp.2 = and i32 %difference, 3
+  %tmp.3_OK = add i32 %tmp.2, %offset
+  %tmp.5.mask = and i32 %difference, -4
+  ; == add %offset, %difference
+  %tmp.8 = add i32 %tmp.3_OK, %tmp.5.mask
+  ret i32 %tmp.8
+}
+
+; Only one bit set
+define i8 @test15(i8 %A) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[C:%.*]] = and i8 [[A:%.*]], 16
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = add i8 %A, -64
+  %C = and i8 %B, 16
+  ret i8 %C
+}
+
+; Only one bit set
+define i8 @test16(i8 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[A:%.*]], 16
+; CHECK-NEXT:    [[C:%.*]] = xor i8 [[B]], 16
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = add i8 %A, 16
+  %C = and i8 %B, 16
+  ret i8 %C
+}
+
+define i32 @test17(i32 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[C:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = xor i32 %A, -1
+  %C = add i32 %B, 1
+  ret i32 %C
+}
+
+define i8 @test18(i8 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[C:%.*]] = sub i8 16, [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = xor i8 %A, -1
+  %C = add i8 %B, 17
+  ret i8 %C
+}
+
+define <2 x i64> @test18vec(<2 x i64> %A) {
+; CHECK-LABEL: @test18vec(
+; CHECK-NEXT:    [[ADD:%.*]] = sub <2 x i64> <i64 1, i64 2>, [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %xor = xor <2 x i64> %A, <i64 -1, i64 -1>
+  %add = add <2 x i64> %xor, <i64 2, i64 3>
+  ret <2 x i64> %add
+}
+
+define i32 @test19(i1 %C) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 1123, i32 133
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = add i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test19vec(i1 %C) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 1123>, <2 x i32> <i32 133, i32 133>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = add <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+; This is an InstSimplify fold, but test it here to make sure that
+; InstCombine does not prevent the fold.
+; With NSW, add of sign bit -> or of sign bit.
+
+define i32 @test20(i32 %x) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %y = xor i32 %x, -2147483648
+  %z = add nsw i32 %y, -2147483648
+  ret i32 %z
+}
+
+define i32 @xor_sign_bit(i32 %x) {
+; CHECK-LABEL: @xor_sign_bit(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], -2147483606
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %xor = xor i32 %x, 2147483648
+  %add = add i32 %xor, 42
+  ret i32 %add
+}
+
+; No-wrap info allows converting the add to 'or'.
+
+define i8 @add_nsw_signbit(i8 %x) {
+; CHECK-LABEL: @add_nsw_signbit(
+; CHECK-NEXT:    [[Y:%.*]] = or i8 [[X:%.*]], -128
+; CHECK-NEXT:    ret i8 [[Y]]
+;
+  %y = add nsw i8 %x, -128
+  ret i8 %y
+}
+
+; No-wrap info allows converting the add to 'or'.
+
+define i8 @add_nuw_signbit(i8 %x) {
+; CHECK-LABEL: @add_nuw_signbit(
+; CHECK-NEXT:    [[Y:%.*]] = or i8 [[X:%.*]], -128
+; CHECK-NEXT:    ret i8 [[Y]]
+;
+  %y = add nuw i8 %x, 128
+  ret i8 %y
+}
+
+define i32 @add_nsw_sext_add(i8 %x) {
+; CHECK-LABEL: @add_nsw_sext_add(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[TMP1]], 398
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %add = add nsw i8 %x, 42
+  %ext = sext i8 %add to i32
+  %r = add i32 %ext, 356
+  ret i32 %r
+}
+
+; Negative test - extra use of the sext means increase of instructions.
+
+define i32 @add_nsw_sext_add_extra_use_1(i8 %x, i32* %p) {
+; CHECK-LABEL: @add_nsw_sext_add_extra_use_1(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[EXT:%.*]] = sext i8 [[ADD]] to i32
+; CHECK-NEXT:    store i32 [[EXT]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[EXT]], 356
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %add = add nsw i8 %x, 42
+  %ext = sext i8 %add to i32
+  store i32 %ext, i32* %p
+  %r = add i32 %ext, 356
+  ret i32 %r
+}
+
+define <2 x i32> @add_nsw_sext_add_vec_extra_use_2(<2 x i8> %x, <2 x i8>* %p) {
+; CHECK-LABEL: @add_nsw_sext_add_vec_extra_use_2(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw <2 x i8> [[X:%.*]], <i8 42, i8 -5>
+; CHECK-NEXT:    store <2 x i8> [[ADD]], <2 x i8>* [[P:%.*]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 398, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %add = add nsw <2 x i8> %x, <i8 42, i8 -5>
+  store <2 x i8> %add, <2 x i8>* %p
+  %ext = sext <2 x i8> %add to <2 x i32>
+  %r = add <2 x i32> %ext, <i32 356, i32 12>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @add_nuw_zext_add_vec(<2 x i16> %x) {
+; CHECK-LABEL: @add_nuw_zext_add_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 65850, i32 -7>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %add = add nuw <2 x i16> %x, <i16 -42, i16 5>
+  %ext = zext <2 x i16> %add to <2 x i32>
+  %r = add <2 x i32> %ext, <i32 356, i32 -12>
+  ret <2 x i32> %r
+}
+
+; Negative test - extra use of the zext means increase of instructions.
+
+define i64 @add_nuw_zext_add_extra_use_1(i8 %x, i64* %p) {
+; CHECK-LABEL: @add_nuw_zext_add_extra_use_1(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[ADD]] to i64
+; CHECK-NEXT:    store i64 [[EXT]], i64* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i64 [[EXT]], 356
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %add = add nuw i8 %x, 42
+  %ext = zext i8 %add to i64
+  store i64 %ext, i64* %p
+  %r = add i64 %ext, 356
+  ret i64 %r
+}
+
+define i64 @add_nuw_zext_add_extra_use_2(i8 %x, i8* %p) {
+; CHECK-LABEL: @add_nuw_zext_add_extra_use_2(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i8 [[X:%.*]], 42
+; CHECK-NEXT:    store i8 [[ADD]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X]] to i64
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i64 [[TMP1]], -314
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %add = add nuw i8 %x, 42
+  store i8 %add, i8* %p
+  %ext = zext i8 %add to i64
+  %r = add i64 %ext, -356
+  ret i64 %r
+}
+
+define i1 @test21(i32 %x) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[Y:%.*]] = icmp eq i32 [[X:%.*]], 119
+; CHECK-NEXT:    ret i1 [[Y]]
+;
+  %t = add i32 %x, 4
+  %y = icmp eq i32 %t, 123
+  ret i1 %y
+}
+
+define <2 x i1> @test21vec(<2 x i32> %x) {
+; CHECK-LABEL: @test21vec(
+; CHECK-NEXT:    [[Y:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 119, i32 119>
+; CHECK-NEXT:    ret <2 x i1> [[Y]]
+;
+  %t = add <2 x i32> %x, <i32 4, i32 4>
+  %y = icmp eq <2 x i32> %t, <i32 123, i32 123>
+  ret <2 x i1> %y
+}
+
+define i32 @test22(i32 %V) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    switch i32 [[V:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 10, label [[LAB1:%.*]]
+; CHECK-NEXT:    i32 20, label [[LAB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       Default:
+; CHECK-NEXT:    ret i32 123
+; CHECK:       Lab1:
+; CHECK-NEXT:    ret i32 12312
+; CHECK:       Lab2:
+; CHECK-NEXT:    ret i32 1231231
+;
+  %V2 = add i32 %V, 10
+  switch i32 %V2, label %Default [
+  i32 20, label %Lab1
+  i32 30, label %Lab2
+  ]
+
+Default:                ; preds = %0
+  ret i32 123
+
+Lab1:           ; preds = %0
+  ret i32 12312
+
+Lab2:           ; preds = %0
+  ret i32 1231231
+}
+
+define i32 @test23(i1 %C, i32 %a) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[ENDIF:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    br label [[ENDIF]]
+; CHECK:       endif:
+; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 2, [[ELSE]] ]
+; CHECK-NEXT:    ret i32 [[B_0]]
+;
+entry:
+  br i1 %C, label %endif, label %else
+
+else:           ; preds = %entry
+  br label %endif
+
+endif:          ; preds = %else, %entry
+  %b.0 = phi i32 [ 0, %entry ], [ 1, %else ]
+  %tmp.4 = add i32 %b.0, 1
+  ret i32 %tmp.4
+}
+
+define i32 @test24(i32 %A) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = add i32 %A, 1
+  %C = shl i32 %B, 1
+  %D = sub i32 %C, 2
+  ret i32 %D
+}
+
+define i64 @test25(i64 %Y) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[TMP_8:%.*]] = shl i64 [[Y:%.*]], 3
+; CHECK-NEXT:    ret i64 [[TMP_8]]
+;
+  %tmp.4 = shl i64 %Y, 2
+  %tmp.12 = shl i64 %Y, 2
+  %tmp.8 = add i64 %tmp.4, %tmp.12
+  ret i64 %tmp.8
+}
+
+define i32 @test26(i32 %A, i32 %B) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %C = add i32 %A, %B
+  %D = sub i32 %C, %B
+  ret i32 %D
+}
+
+; Fold add through select.
+define i32 @test27(i1 %C, i32 %X, i32 %Y) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[C_UPGRD_1_V:%.*]] = select i1 [[C:%.*]], i32 [[X:%.*]], i32 123
+; CHECK-NEXT:    ret i32 [[C_UPGRD_1_V]]
+;
+  %A = add i32 %X, %Y
+  %B = add i32 %Y, 123
+  %C.upgrd.1 = select i1 %C, i32 %A, i32 %B
+  %D = sub i32 %C.upgrd.1, %Y
+  ret i32 %D
+}
+
+define i32 @test28(i32 %X) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[Z:%.*]] = sub i32 -1192, [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %Y = add i32 %X, 1234
+  %Z = sub i32 42, %Y
+  ret i32 %Z
+}
+
+define i32 @test29(i32 %x, i32 %y) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[TMP_2:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP_7:%.*]] = and i32 [[X]], 63
+; CHECK-NEXT:    [[TMP_9:%.*]] = and i32 [[TMP_2]], -64
+; CHECK-NEXT:    [[TMP_10:%.*]] = or i32 [[TMP_7]], [[TMP_9]]
+; CHECK-NEXT:    ret i32 [[TMP_10]]
+;
+  %tmp.2 = sub i32 %x, %y
+  %tmp.2.mask = and i32 %tmp.2, 63
+  %tmp.6 = add i32 %tmp.2.mask, %y
+  %tmp.7 = and i32 %tmp.6, 63
+  %tmp.9 = and i32 %tmp.2, -64
+  %tmp.10 = or i32 %tmp.7, %tmp.9
+  ret i32 %tmp.10
+}
+
+; Add of sign bit -> xor of sign bit.
+define i64 @test30(i64 %x) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    ret i64 [[X:%.*]]
+;
+  %tmp.2 = xor i64 %x, -9223372036854775808
+  %tmp.4 = add i64 %tmp.2, -9223372036854775808
+  ret i64 %tmp.4
+}
+
+define i32 @test31(i32 %A) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[A:%.*]], 5
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %B = add i32 %A, 4
+  %C = mul i32 %B, 5
+  %D = sub i32 %C, 20
+  ret i32 %D
+}
+
+define i32 @test32(i32 %A) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[A:%.*]], 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = add i32 %A, 4
+  %C = shl i32 %B, 2
+  %D = sub i32 %C, 16
+  ret i32 %D
+}
+
+define i8 @test33(i8 %A) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[C:%.*]] = or i8 [[A:%.*]], 1
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = and i8 %A, -2
+  %C = add i8 %B, 1
+  ret i8 %C
+}
+
+define i8 @test34(i8 %A) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    [[C:%.*]] = and i8 [[A:%.*]], 12
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = add i8 %A, 64
+  %C = and i8 %B, 12
+  ret i8 %C
+}
+
+; If all bits affected by the add are included
+; in the mask, do the add before the mask op.
+
+define i8 @masked_add(i8 %x) {
+; CHECK-LABEL: @masked_add(
+; CHECK-NEXT:    [[AND1:%.*]] = add i8 [[X:%.*]], 96
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[AND1]], -16
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %and = and i8 %x, 240 ; 0xf0
+  %r = add i8 %and, 96  ; 0x60
+  ret i8 %r
+}
+
+define <2 x i8> @masked_add_splat(<2 x i8> %x) {
+; CHECK-LABEL: @masked_add_splat(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[AND]], <i8 64, i8 64>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %and = and <2 x i8> %x, <i8 192, i8 192> ; 0xc0
+  %r = add <2 x i8> %and, <i8 64, i8 64>  ; 0x40
+  ret <2 x i8> %r
+}
+
+define i8 @not_masked_add(i8 %x) {
+; CHECK-LABEL: @not_masked_add(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], 112
+; CHECK-NEXT:    [[R:%.*]] = add nuw i8 [[AND]], 96
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %and = and i8 %x, 112 ; 0x70
+  %r = add i8 %and, 96  ; 0x60
+  ret i8 %r
+}
+
+define i32 @test35(i32 %a) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    ret i32 -1
+;
+  %tmpnot = xor i32 %a, -1
+  %tmp2 = add i32 %tmpnot, %a
+  ret i32 %tmp2
+}
+
+define i32 @test36(i32 %a) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    ret i32 0
+;
+  %x = and i32 %a, -2
+  %y = and i32 %a, -126
+  %z = add i32 %x, %y
+  %q = and i32 %z, 1  ; always zero
+  ret i32 %q
+}
+
+define i1 @test37(i32 %a, i32 %b) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %a, %b
+  %cmp = icmp eq i32 %add, %a
+  ret i1 %cmp
+}
+
+define i1 @test38(i32 %a, i32 %b) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %a, %b
+  %cmp = icmp eq i32 %add, %b
+  ret i1 %cmp
+}
+
+define i1 @test39(i32 %a, i32 %b) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %b, %a
+  %cmp = icmp eq i32 %add, %a
+  ret i1 %cmp
+}
+
+define i1 @test40(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %b, %a
+  %cmp = icmp eq i32 %add, %b
+  ret i1 %cmp
+}
+
+; (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C))
+
+define i64 @test41(i32 %a) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[SUB:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %add = add nuw i32 %a, 16
+  %zext = zext i32 %add to i64
+  %sub = add i64 %zext, -1
+  ret i64 %sub
+}
+
+; (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C))
+
+define <2 x i64> @test41vec(<2 x i32> %a) {
+; CHECK-LABEL: @test41vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <2 x i32> [[A:%.*]], <i32 15, i32 15>
+; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %add = add nuw <2 x i32> %a, <i32 16, i32 16>
+  %zext = zext <2 x i32> %add to <2 x i64>
+  %sub = add <2 x i64> %zext, <i64 -1, i64 -1>
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @test41vec_and_multiuse(<2 x i32> %a) {
+; CHECK-LABEL: @test41vec_and_multiuse(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i32> [[ADD]] to <2 x i64>
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw <2 x i64> [[ZEXT]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[SUB]], [[ZEXT]]
+; CHECK-NEXT:    ret <2 x i64> [[EXTRAUSE]]
+;
+  %add = add nuw <2 x i32> %a, <i32 16, i32 16>
+  %zext = zext <2 x i32> %add to <2 x i64>
+  %sub = add <2 x i64> %zext, <i64 -1, i64 -1>
+  %extrause = add <2 x i64> %zext, %sub
+  ret <2 x i64> %extrause
+}
+
+define i32 @test42(i1 %C) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 1123, i32 133
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = add i32 123, %A
+  ret i32 %V
+}
+
+define <2 x i32> @test42vec(i1 %C) {
+; CHECK-LABEL: @test42vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 1123>, <2 x i32> <i32 133, i32 133>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = add <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test42vec2(i1 %C) {
+; CHECK-LABEL: @test42vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 2833>, <2 x i32> <i32 133, i32 363>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = add <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %V
+}
+
+define i32 @test55(i1 %which) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1123, [[ENTRY:%.*]] ], [ 133, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = add i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test43vec(i1 %which) {
+; CHECK-LABEL: @test43vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1123, i32 1123>, [[ENTRY:%.*]] ], [ <i32 133, i32 133>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = add <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test43vec2(i1 %which) {
+; CHECK-LABEL: @test43vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1123, i32 2833>, [[ENTRY:%.*]] ], [ <i32 133, i32 363>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = add <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
+
+; E = (A + 1) + ~B = A - B
+define i32 @add_not_increment(i32 %A, i32 %B) {
+; CHECK-LABEL: @add_not_increment(
+; CHECK-NEXT:    [[E:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %C = xor i32 %B, -1
+  %D = add i32 %A, 1
+  %E = add i32 %D, %C
+  ret i32 %E
+}
+
+; E = (A + 1) + ~B = A - B
+define <2 x i32> @add_not_increment_vec(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: @add_not_increment_vec(
+; CHECK-NEXT:    [[E:%.*]] = sub <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[E]]
+;
+  %C = xor <2 x i32> %B, <i32 -1, i32 -1>
+  %D = add <2 x i32> %A, <i32 1, i32 1>
+  %E = add <2 x i32> %D, %C
+  ret <2 x i32> %E
+}
+
+; E = ~B + (1 + A) = A - B
+define i32 @add_not_increment_commuted(i32 %A, i32 %B) {
+; CHECK-LABEL: @add_not_increment_commuted(
+; CHECK-NEXT:    [[E:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %C = xor i32 %B, -1
+  %D = add i32 %A, 1
+  %E = add i32 %C, %D
+  ret i32 %E
+}
+
+; E = (A + ~B) + 1 = A - B
+define i32 @add_to_sub(i32 %M, i32 %B) {
+; CHECK-LABEL: @add_to_sub(
+; CHECK-NEXT:    [[A:%.*]] = mul i32 [[M:%.*]], 42
+; CHECK-NEXT:    [[C:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[A]], [[C]]
+; CHECK-NEXT:    [[E:%.*]] = add i32 [[D]], 1
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %A = mul i32 %M, 42          ; thwart complexity-based ordering
+  %C = xor i32 %B, -1
+  %D = add i32 %A, %C
+  %E = add i32 %D, 1
+  ret i32 %E
+}
+
+; E = (~B + A) + 1 = A - B
+define i32 @add_to_sub2(i32 %A, i32 %M) {
+; CHECK-LABEL: @add_to_sub2(
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[M:%.*]], 42
+; CHECK-NEXT:    [[C:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[C]], [[A:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = add i32 [[D]], 1
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %B = mul i32 %M, 42          ; thwart complexity-based ordering
+  %C = xor i32 %B, -1
+  %D = add i32 %C, %A
+  %E = add i32 %D, 1
+  ret i32 %E
+}
diff --git a/llvm/test/Transforms/InstCombine/add2.ll b/llvm/test/Transforms/InstCombine/add2.ll
new file mode 100644
index 00000000000..ed99936e081
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/add2.ll
@@ -0,0 +1,474 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @test1(i64 %A, i32 %B) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[A:%.*]], 123
+; CHECK-NEXT:    ret i64 [[TMP6]]
+;
+  %tmp12 = zext i32 %B to i64
+  %tmp3 = shl i64 %tmp12, 32
+  %tmp5 = add i64 %tmp3, %A
+  %tmp6 = and i64 %tmp5, 123
+  ret i64 %tmp6
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[F:%.*]] = and i32 [[A:%.*]], 39
+; CHECK-NEXT:    ret i32 [[F]]
+;
+  %B = and i32 %A, 7
+  %C = and i32 %A, 32
+  %F = add i32 %B, %C
+  ret i32 %F
+}
+
+define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[A:%.*]], 128
+; CHECK-NEXT:    [[C:%.*]] = lshr i32 [[A]], 30
+; CHECK-NEXT:    [[F:%.*]] = or i32 [[B]], [[C]]
+; CHECK-NEXT:    ret i32 [[F]]
+;
+  %B = and i32 %A, 128
+  %C = lshr i32 %A, 30
+  %F = add i32 %B, %C
+  ret i32 %F
+}
+
+define i32 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = add nuw i32 %A, %A
+  ret i32 %B
+}
+
+define <2 x i1> @test5(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[ADD:%.*]] = xor <2 x i1> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[ADD]]
+;
+  %add = add <2 x i1> %A, %B
+  ret <2 x i1> %add
+}
+
+define <2 x i64> @test6(<2 x i64> %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[ADD:%.*]] = mul <2 x i64> [[A:%.*]], <i64 5, i64 9>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %shl = shl <2 x i64> %A, <i64 2, i64 3>
+  %add = add <2 x i64> %shl, %A
+  ret <2 x i64> %add
+}
+
+define <2 x i64> @test7(<2 x i64> %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[ADD:%.*]] = mul <2 x i64> [[A:%.*]], <i64 7, i64 12>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %shl = shl <2 x i64> %A, <i64 2, i64 3>
+  %mul = mul <2 x i64> %A, <i64 3, i64 4>
+  %add = add <2 x i64> %shl, %mul
+  ret <2 x i64> %add
+}
+
+define i16 @test9(i16 %a) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[D:%.*]] = mul i16 [[A:%.*]], -32767
+; CHECK-NEXT:    ret i16 [[D]]
+;
+  %b = mul i16 %a, 2
+  %c = mul i16 %a, 32767
+  %d = add i16 %b, %c
+  ret i16 %d
+}
+
+; y + (~((x >> 3) & 0x55555555) + 1) -> y - ((x >> 3) & 0x55555555)
+define i32 @test10(i32 %x, i32 %y) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SHR]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %shr = ashr i32 %x, 3
+  %shr.not = or i32 %shr, -1431655766
+  %neg = xor i32 %shr.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+}
+
+; y + (~(x & 0x55555555) + 1) -> y - (x & 0x55555555)
+define i32 @test11(i32 %x, i32 %y) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+}
+
+; (y + 1) + ~(x & 0x55555555) -> y - (x & 0x55555555)
+define i32 @test12(i32 %x, i32 %y) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %add = add nsw i32 %y, 1
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+}
+
+; y + (~(x & 0x55555556) + 1) -> y - (x & 0x55555556)
+define i32 @test13(i32 %x, i32 %y) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1431655766
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+}
+
+; (y + 1) + ~(x & 0x55555556) -> y - (x & 0x55555556)
+define i32 @test14(i32 %x, i32 %y) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1431655766
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %add = add nsw i32 %y, 1
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+}
+
+; y + (~(x | 0x55555556) + 1) -> y - (x | 0x55555556)
+define i32 @test15(i32 %x, i32 %y) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1431655766
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %x.not = and i32 %x, -1431655767
+  %neg = xor i32 %x.not, -1431655767
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+}
+
+; (y + 1) + ~(x | 0x55555556) -> y - (x | 0x555555556)
+define i32 @test16(i32 %x, i32 %y) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1431655766
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %add = add nsw i32 %y, 1
+  %x.not = and i32 %x, -1431655767
+  %neg = xor i32 %x.not, -1431655767
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+}
+
+; y + (~(x | 0x55555555) + 1) -> y - (x | 0x55555555)
+define i32 @test17(i32 %x, i32 %y) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %x.not = and i32 %x, -1431655766
+  %add2 = xor i32 %x.not, -1431655765
+  %add1 = add nsw i32 %add2, %y
+  ret i32 %add1
+}
+
+; (y + 1) + ~(x | 0x55555555) -> y - (x | 0x55555555)
+define i32 @test18(i32 %x, i32 %y) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %add = add nsw i32 %y, 1
+  %x.not = and i32 %x, -1431655766
+  %neg = xor i32 %x.not, -1431655766
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+}
+
+define i16 @add_nsw_mul_nsw(i16 %x) {
+; CHECK-LABEL: @add_nsw_mul_nsw(
+; CHECK-NEXT:    [[ADD2:%.*]] = mul nsw i16 [[X:%.*]], 3
+; CHECK-NEXT:    ret i16 [[ADD2]]
+;
+  %add1 = add nsw i16 %x, %x
+  %add2 = add nsw i16 %add1, %x
+  ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_1(i16 %x) {
+; CHECK-LABEL: @mul_add_to_mul_1(
+; CHECK-NEXT:    [[ADD2:%.*]] = mul nsw i16 [[X:%.*]], 9
+; CHECK-NEXT:    ret i16 [[ADD2]]
+;
+  %mul1 = mul nsw i16 %x, 8
+  %add2 = add nsw i16 %x, %mul1
+  ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_2(i16 %x) {
+; CHECK-LABEL: @mul_add_to_mul_2(
+; CHECK-NEXT:    [[ADD2:%.*]] = mul nsw i16 [[X:%.*]], 9
+; CHECK-NEXT:    ret i16 [[ADD2]]
+;
+  %mul1 = mul nsw i16 %x, 8
+  %add2 = add nsw i16 %mul1, %x
+  ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_3(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_3(
+; CHECK-NEXT:    [[ADD:%.*]] = mul i16 [[A:%.*]], 5
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul i16 %a, 2
+  %mul2 = mul i16 %a, 3
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+define i16 @mul_add_to_mul_4(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_4(
+; CHECK-NEXT:    [[ADD:%.*]] = mul nsw i16 [[A:%.*]], 9
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul nsw i16 %a, 2
+  %mul2 = mul nsw i16 %a, 7
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+define i16 @mul_add_to_mul_5(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_5(
+; CHECK-NEXT:    [[ADD:%.*]] = mul nsw i16 [[A:%.*]], 10
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul nsw i16 %a, 3
+  %mul2 = mul nsw i16 %a, 7
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+define i32 @mul_add_to_mul_6(i32 %x, i32 %y) {
+; CHECK-LABEL: @mul_add_to_mul_6(
+; CHECK-NEXT:    [[MUL1:%.*]] = mul nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = mul nsw i32 [[MUL1]], 6
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %mul1 = mul nsw i32 %x, %y
+  %mul2 = mul nsw i32 %mul1, 5
+  %add = add nsw i32 %mul1, %mul2
+  ret i32 %add
+}
+
+define i16 @mul_add_to_mul_7(i16 %x) {
+; CHECK-LABEL: @mul_add_to_mul_7(
+; CHECK-NEXT:    [[ADD2:%.*]] = shl i16 [[X:%.*]], 15
+; CHECK-NEXT:    ret i16 [[ADD2]]
+;
+  %mul1 = mul nsw i16 %x, 32767
+  %add2 = add nsw i16 %x, %mul1
+  ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_8(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_8(
+; CHECK-NEXT:    [[ADD:%.*]] = mul nsw i16 [[A:%.*]], 32767
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul nsw i16 %a, 16383
+  %mul2 = mul nsw i16 %a, 16384
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+define i16 @mul_add_to_mul_9(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_9(
+; CHECK-NEXT:    [[ADD:%.*]] = shl i16 [[A:%.*]], 15
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul nsw i16 %a, 16384
+  %mul2 = mul nsw i16 %a, 16384
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+; This test and the next test verify that when a range metadata is attached to
+; llvm.cttz, ValueTracking correctly intersects the range specified by the
+; metadata and the range implied by the intrinsic.
+;
+; In this test, the range specified by the metadata is more strict. Therefore,
+; ValueTracking uses that range.
+define i16 @add_cttz(i16 %a) {
+; CHECK-LABEL: @add_cttz(
+; CHECK-NEXT:    [[CTTZ:%.*]] = call i16 @llvm.cttz.i16(i16 [[A:%.*]], i1 true), !range !0
+; CHECK-NEXT:    [[B:%.*]] = or i16 [[CTTZ]], -8
+; CHECK-NEXT:    ret i16 [[B]]
+;
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in [0, 8).
+  ; Intersecting these ranges, we know the value returned is in [0, 8).
+  ; Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 1000 ; decimal -8
+  ; to
+  ;     or  %cttz, 1111 1111 1111 1000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !0
+  %b = add i16 %cttz, -8
+  ret i16 %b
+}
+declare i16 @llvm.cttz.i16(i16, i1)
+!0 = !{i16 0, i16 8}
+
+; Similar to @add_cttz, but in this test, the range implied by the
+; intrinsic is more strict. Therefore, ValueTracking uses that range.
+define i16 @add_cttz_2(i16 %a) {
+; CHECK-LABEL: @add_cttz_2(
+; CHECK-NEXT:    [[CTTZ:%.*]] = call i16 @llvm.cttz.i16(i16 [[A:%.*]], i1 true), !range !1
+; CHECK-NEXT:    [[B:%.*]] = or i16 [[CTTZ]], -16
+; CHECK-NEXT:    ret i16 [[B]]
+;
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in
+  ; [0, 32). Intersecting these ranges, we know the value returned is in
+  ; [0, 16). Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 0000 ; decimal -16
+  ; to
+  ;     or  %cttz, 1111 1111 1111 0000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !1
+  %b = add i16 %cttz, -16
+  ret i16 %b
+}
+!1 = !{i16 0, i16 32}
+
+define i32 @add_or_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_or_and(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add i32 %or, %and
+  ret i32 %add
+}
+
+define i32 @add_or_and_commutative(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_or_and_commutative(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %y, %x ; swapped
+  %add = add i32 %or, %and
+  ret i32 %add
+}
+
+define i32 @add_and_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_and_or(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add i32 %and, %or
+  ret i32 %add
+}
+
+define i32 @add_and_or_commutative(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_and_or_commutative(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %y, %x ; swapped
+  %add = add i32 %and, %or
+  ret i32 %add
+}
+
+define i32 @add_nsw_or_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_nsw_or_and(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw i32 %or, %and
+  ret i32 %add
+}
+
+define i32 @add_nuw_or_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_nuw_or_and(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nuw i32 %or, %and
+  ret i32 %add
+}
+
+define i32 @add_nuw_nsw_or_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_nuw_nsw_or_and(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw nuw i32 %or, %and
+  ret i32 %add
+}
+
+; A *nsw B + A *nsw C != A *nsw (B + C)
+; e.g. A = -1, B = 1, C = INT_SMAX
+
+define i8 @add_of_mul(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @add_of_mul(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MB1:%.*]] = add i8 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[SUM:%.*]] = mul i8 [[MB1]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[SUM]]
+;
+  entry:
+  %mA = mul nsw i8 %x, %y
+  %mB = mul nsw i8 %x, %z
+  %sum = add nsw i8 %mA, %mB
+  ret i8 %sum
+}
+
+define i32 @add_of_selects(i1 %A, i32 %B) {
+; CHECK-LABEL: @add_of_selects(
+; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[A:%.*]], i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %sel0 = select i1 %A, i32 0, i32 -2
+  %sel1 = select i1 %A, i32 %B, i32 2
+  %add = add i32 %sel0, %sel1
+  ret i32 %add
+}
diff --git a/llvm/test/Transforms/InstCombine/add3.ll b/llvm/test/Transforms/InstCombine/add3.ll
new file mode 100644
index 00000000000..9d3842f1a31
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/add3.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | grep inttoptr | count 2
+
+;; Target triple for gep raising case below.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; PR1795
+define void @test2(i32 %.val24) {
+EntryBlock:
+        add i32 %.val24, -12
+        inttoptr i32 %0 to i32*
+        store i32 1, i32* %1
+        add i32 %.val24, -16
+        inttoptr i32 %2 to i32*
+        getelementptr i32, i32* %3, i32 1
+        load i32, i32* %4
+        tail call i32 @callee( i32 %5 )
+        ret void
+}
+
+declare i32 @callee(i32)
diff --git a/llvm/test/Transforms/InstCombine/add4.ll b/llvm/test/Transforms/InstCombine/add4.ll
new file mode 100644
index 00000000000..79f3fa08fda
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/add4.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; ModuleID = 'test/Transforms/InstCombine/add4.ll'
+source_filename = "test/Transforms/InstCombine/add4.ll"
+
+define i64 @match_unsigned(i64 %x) {
+; CHECK-LABEL: @match_unsigned(
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[X:%.*]], 19136
+; CHECK-NEXT:    ret i64 [[UREM]]
+;
+bb:
+  %tmp = urem i64 %x, 299
+  %tmp1 = udiv i64 %x, 299
+  %tmp2 = urem i64 %tmp1, 64
+  %tmp3 = mul i64 %tmp2, 299
+  %tmp4 = add i64 %tmp, %tmp3
+  ret i64 %tmp4
+}
+
+define i64 @match_andAsRem_lshrAsDiv_shlAsMul(i64 %x) {
+; CHECK-LABEL: @match_andAsRem_lshrAsDiv_shlAsMul(
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[X:%.*]], 576
+; CHECK-NEXT:    ret i64 [[UREM]]
+;
+bb:
+  %tmp = and i64 %x, 63
+  %tmp1 = lshr i64 %x, 6
+  %tmp2 = urem i64 %tmp1, 9
+  %tmp3 = shl i64 %tmp2, 6
+  %tmp4 = add i64 %tmp, %tmp3
+  ret i64 %tmp4
+}
+
+define i64 @match_signed(i64 %x) {
+; CHECK-LABEL: @match_signed(
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[SREM1:%.*]] = srem i64 [[X:%.*]], 172224
+; CHECK-NEXT:    ret i64 [[SREM1]]
+;
+bb:
+  %tmp = srem i64 %x, 299
+  %tmp1 = sdiv i64 %x, 299
+  %tmp2 = srem i64 %tmp1, 64
+  %tmp3 = sdiv i64 %x, 19136
+  %tmp4 = srem i64 %tmp3, 9
+  %tmp5 = mul i64 %tmp2, 299
+  %tmp6 = add i64 %tmp, %tmp5
+  %tmp7 = mul i64 %tmp4, 19136
+  %tmp8 = add i64 %tmp6, %tmp7
+  ret i64 %tmp8
+}
+
+define i64 @not_match_inconsistent_signs(i64 %x) {
+; CHECK-LABEL: @not_match_inconsistent_signs(
+; CHECK:         [[TMP:%.*]] = add
+; CHECK-NEXT:    ret i64 [[TMP]]
+;
+bb:
+  %tmp = urem i64 %x, 299
+  %tmp1 = sdiv i64 %x, 299
+  %tmp2 = urem i64 %tmp1, 64
+  %tmp3 = mul i64 %tmp2, 299
+  %tmp4 = add i64 %tmp, %tmp3
+  ret i64 %tmp4
+}
+
+define i64 @not_match_inconsistent_values(i64 %x) {
+; CHECK-LABEL: @not_match_inconsistent_values(
+; CHECK:         [[TMP:%.*]] = add
+; CHECK-NEXT:    ret i64 [[TMP]]
+;
+bb:
+  %tmp = urem i64 %x, 299
+  %tmp1 = udiv i64 %x, 29
+  %tmp2 = urem i64 %tmp1, 64
+  %tmp3 = mul i64 %tmp2, 299
+  %tmp4 = add i64 %tmp, %tmp3
+  ret i64 %tmp4
+}
+
+define i32 @not_match_overflow(i32 %x) {
+; CHECK-LABEL: @not_match_overflow(
+; CHECK:         [[TMP:%.*]] = add
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+bb:
+  %tmp = urem i32 %x, 299
+  %tmp1 = udiv i32 %x,299
+  %tmp2 = urem i32 %tmp1, 147483647
+  %tmp3 = mul i32 %tmp2, 299
+  %tmp4 = add i32 %tmp, %tmp3
+  ret i32 %tmp4
+}
diff --git a/llvm/test/Transforms/InstCombine/addnegneg.ll b/llvm/test/Transforms/InstCombine/addnegneg.ll
new file mode 100644
index 00000000000..90f6baf5dd5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/addnegneg.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | grep " sub " | count 1
+; PR2047
+
+define i32 @l(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+	%b.neg = sub i32 0, %b		; <i32> [#uses=1]
+	%c.neg = sub i32 0, %c		; <i32> [#uses=1]
+	%sub4 = add i32 %c.neg, %b.neg		; <i32> [#uses=1]
+	%sub6 = add i32 %sub4, %d		; <i32> [#uses=1]
+	ret i32 %sub6
+}
diff --git a/llvm/test/Transforms/InstCombine/addrspacecast.ll b/llvm/test/Transforms/InstCombine/addrspacecast.ll
new file mode 100644
index 00000000000..6caefb166db
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/addrspacecast.ll
@@ -0,0 +1,186 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:32:32:32-p2:16:16:16-n8:16:32:64"
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i1) nounwind
+
+
+define i32* @combine_redundant_addrspacecast(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to i32*
+  ret i32* %z
+}
+
+define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_vector(
+; CHECK: addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x i32*>
+  ret <4 x i32*> %z
+}
+
+define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to float*
+  ret float* %z
+}
+
+define <4 x float*> @combine_redundant_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types_vector(
+; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*>
+; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x float*>
+  ret <4 x float*> %z
+}
+
+define float addrspace(2)* @combine_addrspacecast_bitcast_1(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_bitcast_1(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+  %z = bitcast i32 addrspace(2)* %y to float addrspace(2)*
+  ret float addrspace(2)* %z
+}
+
+define i32 addrspace(2)* @combine_addrspacecast_bitcast_2(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_bitcast_2(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
+  %z = bitcast float addrspace(2)* %y to i32 addrspace(2)*
+  ret i32 addrspace(2)* %z
+}
+
+define i32 addrspace(2)* @combine_bitcast_addrspacecast_1(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_bitcast_addrspacecast_1(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+; CHECK-NEXT: ret
+  %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
+  %z = addrspacecast i8 addrspace(1)* %y to i32 addrspace(2)*
+  ret i32 addrspace(2)* %z
+}
+
+define float addrspace(2)* @combine_bitcast_addrspacecast_2(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_bitcast_addrspacecast_2(
+; CHECK: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
+  %z = addrspacecast i8 addrspace(1)* %y to float addrspace(2)*
+  ret float addrspace(2)* %z
+}
+
+define float addrspace(2)* @combine_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_types(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
+  ret float addrspace(2)* %y
+}
+
+define <4 x float addrspace(2)*> @combine_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_types_vector(
+; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*>
+; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float addrspace(2)*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(2)*>
+  ret <4 x float addrspace(2)*> %y
+}
+
+define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) {
+; CHECK-LABEL: @canonicalize_addrspacecast(
+; CHECK-NEXT: getelementptr inbounds [16 x i32], [16 x i32] addrspace(1)* %arr, i32 0, i32 0
+; CHECK-NEXT: addrspacecast i32 addrspace(1)* %{{[a-zA-Z0-9]+}} to i32*
+; CHECK-NEXT: load i32, i32*
+; CHECK-NEXT: ret i32
+  %p = addrspacecast [16 x i32] addrspace(1)* %arr to i32*
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+@const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22 ]
+
+declare void @foo(i8*) nounwind
+
+; A copy from a constant addrspacecast'ed global
+; CHECK-LABEL: @memcpy_addrspacecast(
+; CHECK-NOT:  call void @llvm.memcpy
+define i32 @memcpy_addrspacecast() nounwind {
+entry:
+  %alloca = alloca i8, i32 48
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 4 %alloca, i8 addrspace(1)* align 4 addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i1 false) nounwind
+  br label %loop.body
+
+loop.body:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop.body]
+  %ptr = getelementptr i8, i8* %alloca, i32 %i
+  %load = load i8, i8* %ptr
+  %ext = zext i8 %load to i32
+  %sum.inc = add i32 %sum, %ext
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i, 48
+  br i1 %cmp, label %loop.body, label %end
+
+end:
+  ret i32 %sum.inc
+}
+
+; CHECK-LABEL: @constant_fold_null(
+; CHECK: i32 addrspace(3)* null to i32 addrspace(4)*
+define void @constant_fold_null() #0 {
+  %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
+  store i32 7, i32 addrspace(4)* %cast
+  ret void
+}
+
+; CHECK-LABEL: @constant_fold_undef(
+; CHECK: ret i32 addrspace(4)* undef
+define i32 addrspace(4)* @constant_fold_undef() #0 {
+  %cast = addrspacecast i32 addrspace(3)* undef to i32 addrspace(4)*
+  ret i32 addrspace(4)* %cast
+}
+
+; CHECK-LABEL: @constant_fold_null_vector(
+; CHECK: addrspacecast (<4 x i32 addrspace(3)*> zeroinitializer to <4 x i32 addrspace(4)*>)
+define <4 x i32 addrspace(4)*> @constant_fold_null_vector() #0 {
+  %cast = addrspacecast <4 x i32 addrspace(3)*> zeroinitializer to <4 x i32 addrspace(4)*>
+  ret <4 x i32 addrspace(4)*> %cast
+}
+
+; CHECK-LABEL: @constant_fold_inttoptr(
+; CHECK: addrspacecast (i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*)
+define void @constant_fold_inttoptr() #0 {
+  %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
+  store i32 7, i32 addrspace(4)* %cast
+  ret void
+}
+
+; CHECK-LABEL: @constant_fold_gep_inttoptr(
+; CHECK: addrspacecast (i32 addrspace(3)* inttoptr (i64 1274 to i32 addrspace(3)*) to i32 addrspace(4)*)
+define void @constant_fold_gep_inttoptr() #0 {
+  %k = inttoptr i32 1234 to i32 addrspace(3)*
+  %gep = getelementptr i32, i32 addrspace(3)* %k, i32 10
+  %cast = addrspacecast i32 addrspace(3)* %gep to i32 addrspace(4)*
+  store i32 7, i32 addrspace(4)* %cast
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll
new file mode 100644
index 00000000000..5b5ba21c443
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll
@@ -0,0 +1,486 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Instcombine should recognize that this code can be adjusted to fit the canonical max/min pattern.
+
+; No change
+
+define i32 @smax1(i32 %n) {
+; CHECK-LABEL: @smax1(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sgt i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; No change
+
+define i32 @smin1(i32 %n) {
+; CHECK-LABEL: @smin1(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp slt i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smax2(i32 %n) {
+; CHECK-LABEL: @smax2(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sge i32 %n, 1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smin2(i32 %n) {
+; CHECK-LABEL: @smin2(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sle i32 %n, -1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smax3(i32 %n) {
+; CHECK-LABEL: @smax3(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sgt i32 %n, -1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @smax3_vec(<2 x i32> %n) {
+; CHECK-LABEL: @smax3_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt <2 x i32> %n, zeroinitializer
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp sgt <2 x i32> %n, <i32 -1, i32 -1>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smin3(i32 %n) {
+; CHECK-LABEL: @smin3(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp slt i32 %n, 1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @smin3_vec(<2 x i32> %n) {
+; CHECK-LABEL: @smin3_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt <2 x i32> %n, zeroinitializer
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp slt <2 x i32> %n, <i32 1, i32 1>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @umax3(i32 %n) {
+; CHECK-LABEL: @umax3(
+; CHECK-NEXT:    [[T:%.*]] = icmp ugt i32 %n, 5
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 5
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp ugt i32 %n, 4
+  %m = select i1 %t, i32 %n, i32 5
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @umax3_vec(<2 x i32> %n) {
+; CHECK-LABEL: @umax3_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp ugt <2 x i32> %n, <i32 5, i32 5>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp ugt <2 x i32> %n, <i32 4, i32 4>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> <i32 5, i32 5>
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @umin3(i32 %n) {
+; CHECK-LABEL: @umin3(
+; CHECK-NEXT:    [[T:%.*]] = icmp ult i32 %n, 6
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 6
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp ult i32 %n, 7
+  %m = select i1 %t, i32 %n, i32 6
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @umin3_vec(<2 x i32> %n) {
+; CHECK-LABEL: @umin3_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp ult <2 x i32> %n, <i32 6, i32 6>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> <i32 6, i32 6>
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp ult <2 x i32> %n, <i32 7, i32 7>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> <i32 6, i32 6>
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smax4(i32 %n) {
+; CHECK-LABEL: @smax4(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sge i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @smax4_vec(<2 x i32> %n) {
+; CHECK-LABEL: @smax4_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt <2 x i32> %n, zeroinitializer
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp sge <2 x i32> %n, zeroinitializer
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smin4(i32 %n) {
+; CHECK-LABEL: @smin4(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sle i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @smin4_vec(<2 x i32> %n) {
+; CHECK-LABEL: @smin4_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt <2 x i32> %n, zeroinitializer
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp sle <2 x i32> %n, zeroinitializer
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @umax4(i32 %n) {
+; CHECK-LABEL: @umax4(
+; CHECK-NEXT:    [[T:%.*]] = icmp ugt i32 %n, 8
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 8
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp uge i32 %n, 8
+  %m = select i1 %t, i32 %n, i32 8
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @umax4_vec(<2 x i32> %n) {
+; CHECK-LABEL: @umax4_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp ugt <2 x i32> %n, <i32 8, i32 8>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> <i32 8, i32 8>
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp uge <2 x i32> %n, <i32 8, i32 8>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> <i32 8, i32 8>
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @umin4(i32 %n) {
+; CHECK-LABEL: @umin4(
+; CHECK-NEXT:    [[T:%.*]] = icmp ult i32 %n, 9
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 9
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp ule i32 %n, 9
+  %m = select i1 %t, i32 %n, i32 9
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @umin4_vec(<2 x i32> %n) {
+; CHECK-LABEL: @umin4_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp ult <2 x i32> %n, <i32 9, i32 9>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> <i32 9, i32 9>
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp ule <2 x i32> %n, <i32 9, i32 9>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> <i32 9, i32 9>
+  ret <2 x i32> %m
+}
+
+define i64 @smax_sext(i32 %a) {
+; CHECK-LABEL: @smax_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[A_EXT]], 0
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 0
+; CHECK-NEXT:    ret i64 [[MAX]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp sgt i32 %a, -1
+  %max = select i1 %cmp, i64 %a_ext, i64 0
+  ret i64 %max
+}
+
+define <2 x i64> @smax_sext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @smax_sext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i64> [[A_EXT]], zeroinitializer
+; CHECK-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[MAX]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %max = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> zeroinitializer
+  ret <2 x i64> %max
+}
+
+define i64 @smin_sext(i32 %a) {
+; CHECK-LABEL: @smin_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[A_EXT]], 0
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 0
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp slt i32 %a, 1
+  %min = select i1 %cmp, i64 %a_ext, i64 0
+  ret i64 %min
+}
+
+define <2 x i64>@smin_sext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @smin_sext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i64> [[A_EXT]], zeroinitializer
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp slt <2 x i32> %a, <i32 1, i32 1>
+  %min = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> zeroinitializer
+  ret <2 x i64> %min
+}
+
+define i64 @umax_sext(i32 %a) {
+; CHECK-LABEL: @umax_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A_EXT]], 3
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 3
+; CHECK-NEXT:    ret i64 [[MAX]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp ugt i32 %a, 2
+  %max = select i1 %cmp, i64 %a_ext, i64 3
+  ret i64 %max
+}
+
+define <2 x i64> @umax_sext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umax_sext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i64> [[A_EXT]], <i64 3, i64 3>
+; CHECK-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 3, i64 3>
+; CHECK-NEXT:    ret <2 x i64> [[MAX]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ugt <2 x i32> %a, <i32 2, i32 2>
+  %max = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> <i64 3, i64 3>
+  ret <2 x i64> %max
+}
+
+define i64 @umin_sext(i32 %a) {
+; CHECK-LABEL: @umin_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[A_EXT]], 2
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 2
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp ult i32 %a, 3
+  %min = select i1 %cmp, i64 %a_ext, i64 2
+  ret i64 %min
+}
+
+define <2 x i64> @umin_sext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umin_sext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[A_EXT]], <i64 2, i64 2>
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ult <2 x i32> %a, <i32 3, i32 3>
+  %min = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> <i64 2, i64 2>
+  ret <2 x i64> %min
+}
+
+define i64 @umax_sext2(i32 %a) {
+; CHECK-LABEL: @umax_sext2(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A_EXT]], 2
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 2
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp ult i32 %a, 3
+  %min = select i1 %cmp, i64 2, i64 %a_ext
+  ret i64 %min
+}
+
+define <2 x i64> @umax_sext2_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umax_sext2_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i64> [[A_EXT]], <i64 2, i64 2>
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ult <2 x i32> %a, <i32 3, i32 3>
+  %min = select <2 x i1> %cmp, <2 x i64> <i64 2, i64 2>, <2 x i64> %a_ext
+  ret <2 x i64> %min
+}
+
+define i64 @umin_sext2(i32 %a) {
+; CHECK-LABEL: @umin_sext2(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[A_EXT]], 3
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 3
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp ugt i32 %a, 2
+  %min = select i1 %cmp, i64 3, i64 %a_ext
+  ret i64 %min
+}
+
+define <2 x i64> @umin_sext2_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umin_sext2_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[A_EXT]], <i64 3, i64 3>
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 3, i64 3>
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ugt <2 x i32> %a, <i32 2, i32 2>
+  %min = select <2 x i1> %cmp, <2 x i64> <i64 3, i64 3>, <2 x i64> %a_ext
+  ret <2 x i64> %min
+}
+
+define i64 @umax_zext(i32 %a) {
+; CHECK-LABEL: @umax_zext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A_EXT]], 3
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 3
+; CHECK-NEXT:    ret i64 [[MAX]]
+;
+  %a_ext = zext i32 %a to i64
+  %cmp = icmp ugt i32 %a, 2
+  %max = select i1 %cmp, i64 %a_ext, i64 3
+  ret i64 %max
+}
+
+define <2 x i64> @umax_zext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umax_zext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i64> [[A_EXT]], <i64 3, i64 3>
+; CHECK-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 3, i64 3>
+; CHECK-NEXT:    ret <2 x i64> [[MAX]]
+;
+  %a_ext = zext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ugt <2 x i32> %a, <i32 2, i32 2>
+  %max = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> <i64 3, i64 3>
+  ret <2 x i64> %max
+}
+
+define i64 @umin_zext(i32 %a) {
+; CHECK-LABEL: @umin_zext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[A_EXT]], 2
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 2
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = zext i32 %a to i64
+  %cmp = icmp ult i32 %a, 3
+  %min = select i1 %cmp, i64 %a_ext, i64 2
+  ret i64 %min
+}
+
+define <2 x i64> @umin_zext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umin_zext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[A_EXT]], <i64 2, i64 2>
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = zext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ult <2 x i32> %a, <i32 3, i32 3>
+  %min = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> <i64 2, i64 2>
+  ret <2 x i64> %min
+}
+
+; Don't crash mishandling a pattern that can't be transformed.
+
+define <2 x i16> @scalar_select_of_vectors(<2 x i16> %a, <2 x i16> %b, i8 %x) {
+; CHECK-LABEL: @scalar_select_of_vectors(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], <2 x i16> %a, <2 x i16> %b
+; CHECK-NEXT:    ret <2 x i16> [[SEL]]
+;
+  %cmp = icmp slt i8 %x, 0
+  %sel = select i1 %cmp, <2 x i16> %a, <2 x i16> %b
+  ret <2 x i16> %sel
+}
+
diff --git a/llvm/test/Transforms/InstCombine/alias-recursion.ll b/llvm/test/Transforms/InstCombine/alias-recursion.ll
new file mode 100644
index 00000000000..efc1899e1f4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/alias-recursion.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%class.A = type { i32 (...)** }
+
+@0 = constant [1 x i8*] zeroinitializer
+
+@vtbl = alias i8*, getelementptr inbounds ([1 x i8*], [1 x i8*]* @0, i32 0, i32 0)
+
+define i32 (%class.A*)* @test() {
+; CHECK-LABEL: test
+entry:
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  %A = phi i32 (%class.A*)** [ bitcast (i8** @vtbl to i32 (%class.A*)**), %for.body ], [ null, %entry ]
+  %B = load i32 (%class.A*)*, i32 (%class.A*)** %A
+  ret i32 (%class.A*)* %B
+}
diff --git a/llvm/test/Transforms/InstCombine/align-2d-gep.ll b/llvm/test/Transforms/InstCombine/align-2d-gep.ll
new file mode 100644
index 00000000000..bbdb3f94cb2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/align-2d-gep.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -instcombine -S | grep "align 16" | count 1
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; A multi-dimensional array in a nested loop doing vector stores that
+; aren't yet aligned. Instcombine can understand the addressing in the
+; Nice case to prove 16 byte alignment. In the Awkward case, the inner
+; array dimension is not even, so the stores to it won't always be
+; aligned. Instcombine should prove alignment in exactly one of the two
+; stores.
+
+@Nice    = global [1001 x [20000 x double]] zeroinitializer, align 32
+@Awkward = global [1001 x [20001 x double]] zeroinitializer, align 32
+
+define void @foo() nounwind  {
+entry:
+  br label %bb7.outer
+
+bb7.outer:
+  %i = phi i64 [ 0, %entry ], [ %indvar.next26, %bb11 ]
+  br label %bb1
+
+bb1:
+  %j = phi i64 [ 0, %bb7.outer ], [ %indvar.next, %bb1 ]
+
+  %t4 = getelementptr [1001 x [20000 x double]], [1001 x [20000 x double]]* @Nice, i64 0, i64 %i, i64 %j
+  %q = bitcast double* %t4 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %q, align 8
+
+  %s4 = getelementptr [1001 x [20001 x double]], [1001 x [20001 x double]]* @Awkward, i64 0, i64 %i, i64 %j
+  %r = bitcast double* %s4 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %r, align 8
+
+  %indvar.next = add i64 %j, 2
+  %exitcond = icmp eq i64 %indvar.next, 556
+  br i1 %exitcond, label %bb11, label %bb1
+
+bb11:
+  %indvar.next26 = add i64 %i, 1
+  %exitcond27 = icmp eq i64 %indvar.next26, 991
+  br i1 %exitcond27, label %return.split, label %bb7.outer
+
+return.split:
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
new file mode 100644
index 00000000000..d92daddd761
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Instcombine should be able to prove vector alignment in the
+; presence of a few mild address computation tricks.
+
+; CHECK-LABEL: @test0(
+; CHECK: align 16
+
+define void @test0(i8* %b, i64 %n, i64 %u, i64 %y) nounwind  {
+entry:
+  %c = ptrtoint i8* %b to i64
+  %d = and i64 %c, -16
+  %e = inttoptr i64 %d to double*
+  %v = mul i64 %u, 2
+  %z = and i64 %y, -2
+  %t1421 = icmp eq i64 %n, 0
+  br i1 %t1421, label %return, label %bb
+
+bb:
+  %i = phi i64 [ %indvar.next, %bb ], [ 20, %entry ]
+  %j = mul i64 %i, %v
+  %h = add i64 %j, %z
+  %t8 = getelementptr double, double* %e, i64 %h
+  %p = bitcast double* %t8 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %p, align 8
+  %indvar.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %indvar.next, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
+
+; When we see a unaligned load from an insufficiently aligned global or
+; alloca, increase the alignment of the load, turning it into an aligned load.
+
+; CHECK-LABEL: @test1(
+; CHECK: tmp = load
+; CHECK: GLOBAL{{.*}}align 16
+
+@GLOBAL = internal global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1(<2 x i64> %x) {
+entry:
+	%tmp = load <16 x i8>, <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
+	ret <16 x i8> %tmp
+}
+
+@GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1{{.*}}align 16
+  %tmp = load <16 x i8>, <16 x i8> addrspace(1)* bitcast ([4 x i32] addrspace(1)* @GLOBAL_as1 to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+@GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1_gep(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1_gep(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1_gep{{.*}}align 16
+  %tmp = load <16 x i8>, <16 x i8> addrspace(1)* bitcast (i32 addrspace(1)* getelementptr ([8 x i32], [8 x i32] addrspace(1)* @GLOBAL_as1_gep, i16 0, i16 4) to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+
+; When a load or store lacks an explicit alignment, add one.
+
+; CHECK-LABEL: @test2(
+; CHECK: load double, double* %p, align 8
+; CHECK: store double %n, double* %p, align 8
+
+define double @test2(double* %p, double %n) nounwind {
+  %t = load double, double* %p
+  store double %n, double* %p
+  ret double %t
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+declare void @use(i8*)
+
+%struct.s = type { i32, i32, i32, i32 }
+
+define void @test3(%struct.s* sret %a4) {
+; Check that the alignment is bumped up the alignment of the sret type.
+; CHECK-LABEL: @test3(
+  %a4.cast = bitcast %struct.s* %a4 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %a4.cast, i8 0, i64 16, i1 false)
+  call void @use(i8* %a4.cast)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/align-attr.ll b/llvm/test/Transforms/InstCombine/align-attr.ll
new file mode 100644
index 00000000000..75a3766b7d1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/align-attr.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo1(i32* align 32 %a) #0 {
+entry:
+  %0 = load i32, i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32, i32* %a, align 32
+; CHECK: ret i32
+}
+
+define i32 @foo2(i32* align 32 %a) #0 {
+entry:
+  %v = call i32* @func1(i32* %a)
+  %0 = load i32, i32* %v, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2
+; CHECK-DAG: load i32, i32* %v, align 32
+; CHECK: ret i32
+}
+
+declare i32* @func1(i32* returned) nounwind
+
diff --git a/llvm/test/Transforms/InstCombine/align-external.ll b/llvm/test/Transforms/InstCombine/align-external.ll
new file mode 100644
index 00000000000..15f3096105b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/align-external.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Don't assume that external global variables or those with weak linkage have
+; their preferred alignment. They may only have the ABI minimum alignment.
+
+target datalayout = "i32:8:32"
+
+@A = external global i32
+@B = weak_odr global i32 0
+
+@C = available_externally global <4 x i32> zeroinitializer, align 4
+; CHECK: @C = available_externally global <4 x i32> zeroinitializer, align 4
+
+define i64 @foo(i64 %a) {
+  %t = ptrtoint i32* @A to i64
+  %s = shl i64 %a, 3
+  %r = or i64 %t, %s
+  %q = add i64 %r, 1
+  ret i64 %q
+}
+
+; CHECK-LABEL: define i64 @foo(i64 %a)
+; CHECK: %s = shl i64 %a, 3
+; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64)
+; CHECK: %q = add i64 %r, 1
+; CHECK: ret i64 %q
+
+define i32 @bar() {
+  %r = load i32, i32* @B, align 1
+  ret i32 %r
+}
+
+; CHECK-LABEL: @bar()
+; CHECK: align 1
+
+define void @vec_store() {
+  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* @C, align 4
+  ret void
+}
+; CHECK: define void @vec_store()
+; CHECK: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* @C, align 4
diff --git a/llvm/test/Transforms/InstCombine/all-bits-shift.ll b/llvm/test/Transforms/InstCombine/all-bits-shift.ll
new file mode 100644
index 00000000000..a035f53d1aa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/all-bits-shift.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -instcombine -expensive-combines < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@d = global i32 15, align 4
+@b = global i32* @d, align 8
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #1 {
+entry:
+  %0 = load i32*, i32** @b, align 8
+  %1 = load i32, i32* @a, align 4
+  %lnot = icmp eq i32 %1, 0
+  %lnot.ext = zext i1 %lnot to i32
+  %shr.i = lshr i32 2072, %lnot.ext
+  %call.lobit = lshr i32 %shr.i, 7
+  %2 = and i32 %call.lobit, 1
+  %3 = load i32, i32* %0, align 4
+  %or = or i32 %2, %3
+  store i32 %or, i32* %0, align 4
+  %4 = load i32, i32* @a, align 4
+  %lnot.1 = icmp eq i32 %4, 0
+  %lnot.ext.1 = zext i1 %lnot.1 to i32
+  %shr.i.1 = lshr i32 2072, %lnot.ext.1
+  %call.lobit.1 = lshr i32 %shr.i.1, 7
+  %5 = and i32 %call.lobit.1, 1
+  %or.1 = or i32 %5, %or
+  store i32 %or.1, i32* %0, align 4
+  ret i32 %or.1
+
+; Check that both InstCombine and InstSimplify can use computeKnownBits to
+; realize that:
+;   ((2072 >> (L == 0)) >> 7) & 1
+; is always zero.
+
+; CHECK-LABEL: @main
+; CHECK: %[[V1:[0-9]+]] = load i32*, i32** @b, align 8
+; CHECK: %[[V2:[0-9]+]] = load i32, i32* %[[V1]], align 4
+; CHECK: ret i32 %[[V2]]
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
diff --git a/llvm/test/Transforms/InstCombine/alloca-big.ll b/llvm/test/Transforms/InstCombine/alloca-big.ll
new file mode 100644
index 00000000000..bff5fcfe4e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/alloca-big.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; OSS-Fuzz: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5223
+define void @test_bigalloc() {
+; CHECK-LABEL: @test_bigalloc(
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [18446744069414584320 x i8], align 1
+; CHECK-NEXT:    [[DOTSUB:%.*]] = getelementptr inbounds [18446744069414584320 x i8], [18446744069414584320 x i8]* [[TMP1]], i64 0, i64 0
+; CHECK-NEXT:    store i8* [[DOTSUB]], i8** undef, align 8
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca i8, i864 -4294967296
+  %2 = getelementptr i8, i8* %1, i1 undef
+  store i8* %2, i8** undef
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll b/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll
new file mode 100644
index 00000000000..c3bca700ea7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -S -instcombine -instcombine-lower-dbg-declare=0 | FileCheck %s
+
+; In this example, instcombine wants to turn "local" into an i64, since that's
+; how it is stored. It should keep the debug info referring to the alloca when
+; it does the replacement.
+
+; C source:
+; struct Foo {
+;   int x, y;
+; };
+; void escape(const void*);
+; void f(struct Foo *p) {
+;   struct Foo local;
+;   *(__int64 *)&local = *(__int64 *)p;
+;   escape(&local);
+; }
+
+; ModuleID = '<stdin>'
+source_filename = "t.c"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.11.25508"
+
+%struct.Foo = type { i32, i32 }
+
+define void @f(%struct.Foo* %p) !dbg !11 {
+entry:
+  %local = alloca %struct.Foo, align 4
+  %0 = bitcast %struct.Foo* %local to i8*, !dbg !24
+  call void @llvm.dbg.declare(metadata %struct.Foo* %local, metadata !22, metadata !DIExpression()), !dbg !25
+  %1 = bitcast %struct.Foo* %p to i64*, !dbg !26
+  %2 = load i64, i64* %1, align 8, !dbg !26, !tbaa !27
+  %3 = bitcast %struct.Foo* %local to i64*, !dbg !31
+  store i64 %2, i64* %3, align 4, !dbg !32, !tbaa !27
+  %4 = bitcast %struct.Foo* %local to i8*, !dbg !33
+  call void @escape(i8* %4), !dbg !34
+  %5 = bitcast %struct.Foo* %local to i8*, !dbg !35
+  ret void, !dbg !35
+}
+
+; CHECK-LABEL: define void @f(%struct.Foo* %p)
+; CHECK: %local = alloca i64, align 8
+; CHECK: call void @llvm.dbg.declare(metadata i64* %local, metadata !22, metadata !DIExpression())
+; CHECK: [[simplified:%.*]] = bitcast i64* %local to i8*
+;
+; Another dbg.value for "local" would be redundant here.
+; CHECK-NOT: call void @llvm.dbg.value(metadata i8* [[simplified]], metadata !22, metadata !DIExpression())
+;
+; CHECK: call void @escape(i8* nonnull [[simplified]])
+; CHECK: ret void
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+declare void @escape(i8*)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
+!1 = !DIFile(filename: "t.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild", checksumkind: CSK_MD5, checksum: "d7473625866433067a75fd7d03d2abf7")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
+!5 = !DIBasicType(name: "long long int", size: 64, encoding: DW_ATE_signed)
+!6 = !{i32 2, !"CodeView", i32 1}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i32 1, !"wchar_size", i32 2}
+!9 = !{i32 7, !"PIC Level", i32 2}
+!10 = !{!"clang version 6.0.0 "}
+!11 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !20)
+!12 = !DISubroutineType(types: !13)
+!13 = !{null, !14}
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64)
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !1, line: 1, size: 64, elements: !16)
+!16 = !{!17, !19}
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !15, file: !1, line: 2, baseType: !18, size: 32)
+!18 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!19 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !15, file: !1, line: 2, baseType: !18, size: 32, offset: 32)
+!20 = !{!21, !22}
+!21 = !DILocalVariable(name: "p", arg: 1, scope: !11, file: !1, line: 5, type: !14)
+!22 = !DILocalVariable(name: "local", scope: !11, file: !1, line: 6, type: !15)
+!23 = !DILocation(line: 5, column: 20, scope: !11)
+!24 = !DILocation(line: 6, column: 3, scope: !11)
+!25 = !DILocation(line: 6, column: 14, scope: !11)
+!26 = !DILocation(line: 7, column: 24, scope: !11)
+!27 = !{!28, !28, i64 0}
+!28 = !{!"long long", !29, i64 0}
+!29 = !{!"omnipotent char", !30, i64 0}
+!30 = !{!"Simple C/C++ TBAA"}
+!31 = !DILocation(line: 7, column: 3, scope: !11)
+!32 = !DILocation(line: 7, column: 22, scope: !11)
+!33 = !DILocation(line: 8, column: 10, scope: !11)
+!34 = !DILocation(line: 8, column: 3, scope: !11)
+!35 = !DILocation(line: 9, column: 1, scope: !11)
diff --git a/llvm/test/Transforms/InstCombine/alloca.ll b/llvm/test/Transforms/InstCombine/alloca.ll
new file mode 100644
index 00000000000..c1ec9b3d00e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/alloca.ll
@@ -0,0 +1,179 @@
+; RUN: opt < %s -instcombine -S -data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=CHECK -check-prefix=ALL
+; RUN: opt < %s -instcombine -S -data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=P32 -check-prefix=ALL
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NODL -check-prefix=ALL
+
+
+declare void @use(...)
+
+@int = global i32 zeroinitializer
+
+; Zero byte allocas should be merged if they can't be deleted.
+; CHECK-LABEL: @test(
+; CHECK: alloca
+; CHECK-NOT: alloca
+define void @test() {
+        %X = alloca [0 x i32]           ; <[0 x i32]*> [#uses=1]
+        call void (...) @use( [0 x i32]* %X )
+        %Y = alloca i32, i32 0          ; <i32*> [#uses=1]
+        call void (...) @use( i32* %Y )
+        %Z = alloca {  }                ; <{  }*> [#uses=1]
+        call void (...) @use( {  }* %Z )
+        %size = load i32, i32* @int
+        %A = alloca {{}}, i32 %size
+        call void (...) @use( {{}}* %A )
+        ret void
+}
+
+; Zero byte allocas should be deleted.
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+define void @test2() {
+        %A = alloca i32         ; <i32*> [#uses=1]
+        store i32 123, i32* %A
+        ret void
+}
+
+; Zero byte allocas should be deleted.
+; CHECK-LABEL: @test3(
+; CHECK-NOT: alloca
+define void @test3() {
+        %A = alloca { i32 }             ; <{ i32 }*> [#uses=1]
+        %B = getelementptr { i32 }, { i32 }* %A, i32 0, i32 0            ; <i32*> [#uses=1]
+        store i32 123, i32* %B
+        ret void
+}
+
+; CHECK-LABEL: @test4(
+; CHECK: = zext i32 %n to i64
+; CHECK: %A = alloca i32, i64 %
+define i32* @test4(i32 %n) {
+  %A = alloca i32, i32 %n
+  ret i32* %A
+}
+
+; Allocas which are only used by GEPs, bitcasts, addrspacecasts, and stores
+; (transitively) should be deleted.
+define void @test5() {
+; CHECK-LABEL: @test5(
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK: ret
+
+entry:
+  %a = alloca { i32 }
+  %b = alloca i32*
+  %c = alloca i32
+  %a.1 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store i32 123, i32* %a.1
+  store i32* %a.1, i32** %b
+  %b.1 = bitcast i32** %b to i32*
+  store i32 123, i32* %b.1
+  %a.2 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store atomic i32 2, i32* %a.2 unordered, align 4
+  %a.3 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store atomic i32 3, i32* %a.3 release, align 4
+  %a.4 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store atomic i32 4, i32* %a.4 seq_cst, align 4
+  %c.1 = addrspacecast i32* %c to i32 addrspace(1)*
+  store i32 123, i32 addrspace(1)* %c.1
+  ret void
+}
+
+declare void @f(i32* %p)
+
+; Check that we don't delete allocas in some erroneous cases.
+define void @test6() {
+; CHECK-LABEL: @test6(
+; CHECK-NOT: ret
+; CHECK: alloca
+; CHECK-NEXT: alloca
+; CHECK: ret
+
+entry:
+  %a = alloca { i32 }
+  %b = alloca i32
+  %a.1 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store volatile i32 123, i32* %a.1
+  tail call void @f(i32* %b)
+  ret void
+}
+
+; PR14371
+%opaque_type = type opaque
+%real_type = type { { i32, i32* } }
+
+@opaque_global = external constant %opaque_type, align 4
+
+define void @test7() {
+entry:
+  %0 = alloca %real_type, align 4
+  %1 = bitcast %real_type* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+
+
+; Check that the GEP indices use the pointer size, or 64 if unknown
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK: alloca [100 x i32]
+; CHECK: getelementptr inbounds [100 x i32], [100 x i32]* %x1, i64 0, i64 0
+
+; P32-LABEL: @test8(
+; P32: alloca [100 x i32]
+; P32: getelementptr inbounds [100 x i32], [100 x i32]* %x1, i32 0, i32 0
+
+; NODL-LABEL: @test8(
+; NODL: alloca [100 x i32]
+; NODL: getelementptr inbounds [100 x i32], [100 x i32]* %x1, i64 0, i64 0
+  %x = alloca i32, i32 100
+  call void (...) @use(i32* %x)
+  ret void
+}
+
+; PR19569
+%struct_type = type { i32, i32 }
+declare void @test9_aux(<{ %struct_type }>* inalloca)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define void @test9(%struct_type* %a) {
+; CHECK-LABEL: @test9(
+entry:
+  %inalloca.save = call i8* @llvm.stacksave()
+  %argmem = alloca inalloca <{ %struct_type }>
+; CHECK: alloca inalloca i64, align 8
+  %0 = getelementptr inbounds <{ %struct_type }>, <{ %struct_type }>* %argmem, i32 0, i32 0
+  %1 = bitcast %struct_type* %0 to i8*
+  %2 = bitcast %struct_type* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %2, i32 8, i1 false)
+  call void @test9_aux(<{ %struct_type }>* inalloca %argmem)
+  call void @llvm.stackrestore(i8* %inalloca.save)
+  ret void
+}
+
+define void @test10() {
+entry:
+; ALL-LABEL: @test10(
+; ALL: %v32 = alloca i1, align 8
+; ALL: %v64 = alloca i1, align 8
+; ALL: %v33 = alloca i1, align 8
+  %v32 = alloca i1, align 8
+  %v64 = alloca i1, i64 1, align 8
+  %v33 = alloca i1, i33 1, align 8
+  call void (...) @use(i1* %v32, i1* %v64, i1* %v33)
+  ret void
+}
+
+define void @test11() {
+entry:
+; ALL-LABEL: @test11(
+; ALL: %y = alloca i32
+; ALL: call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+; ALL: ret void
+  %y = alloca i32
+  call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/allocsize-32.ll b/llvm/test/Transforms/InstCombine/allocsize-32.ll
new file mode 100644
index 00000000000..a732f64e43d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/allocsize-32.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; The idea is that we want to have sane semantics (e.g. not assertion failures)
+; when given an allocsize function that takes a 64-bit argument in the face of
+; 32-bit pointers.
+
+target datalayout="e-p:32:32:32"
+
+declare i8* @my_malloc(i8*, i64) allocsize(1)
+
+define void @test_malloc(i8** %p, i32* %r) {
+  %1 = call i8* @my_malloc(i8* null, i64 100)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false)
+  ; CHECK: store i32 100
+  store i32 %2, i32* %r, align 8
+
+  ; Big number is 5 billion.
+  %3 = call i8* @my_malloc(i8* null, i64 5000000000)
+  store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  ; CHECK: call i32 @llvm.objectsize
+  %4 = call i32 @llvm.objectsize.i32.p0i8(i8* %3, i1 false)
+  store i32 %4, i32* %r, align 8
+  ret void
+}
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)
diff --git a/llvm/test/Transforms/InstCombine/allocsize.ll b/llvm/test/Transforms/InstCombine/allocsize.ll
new file mode 100644
index 00000000000..ac1817c164f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/allocsize.ll
@@ -0,0 +1,154 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; Test that instcombine folds allocsize function calls properly.
+; Dummy arguments are inserted to verify that allocsize is picking the right
+; args, and to prove that arbitrary unfoldable values don't interfere with
+; allocsize if they're not used by allocsize.
+
+declare i8* @my_malloc(i8*, i32) allocsize(1)
+declare i8* @my_calloc(i8*, i8*, i32, i32) allocsize(2, 3)
+
+; CHECK-LABEL: define void @test_malloc
+define void @test_malloc(i8** %p, i64* %r) {
+  %1 = call i8* @my_malloc(i8* null, i32 100)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  ; CHECK: store i64 100
+  store i64 %2, i64* %r, align 8
+  ret void
+}
+
+; CHECK-LABEL: define void @test_calloc
+define void @test_calloc(i8** %p, i64* %r) {
+  %1 = call i8* @my_calloc(i8* null, i8* null, i32 100, i32 5)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  ; CHECK: store i64 500
+  store i64 %2, i64* %r, align 8
+  ret void
+}
+
+; Failure cases with non-constant values...
+; CHECK-LABEL: define void @test_malloc_fails
+define void @test_malloc_fails(i8** %p, i64* %r, i32 %n) {
+  %1 = call i8* @my_malloc(i8* null, i32 %n)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  ; CHECK: @llvm.objectsize.i64.p0i8
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  store i64 %2, i64* %r, align 8
+  ret void
+}
+
+; CHECK-LABEL: define void @test_calloc_fails
+define void @test_calloc_fails(i8** %p, i64* %r, i32 %n) {
+  %1 = call i8* @my_calloc(i8* null, i8* null, i32 %n, i32 5)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  ; CHECK: @llvm.objectsize.i64.p0i8
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  store i64 %2, i64* %r, align 8
+
+
+  %3 = call i8* @my_calloc(i8* null, i8* null, i32 100, i32 %n)
+  store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  ; CHECK: @llvm.objectsize.i64.p0i8
+  %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false)
+  store i64 %4, i64* %r, align 8
+  ret void
+}
+
+declare i8* @my_malloc_outofline(i8*, i32) #0
+declare i8* @my_calloc_outofline(i8*, i8*, i32, i32) #1
+
+; Verifying that out of line allocsize is parsed correctly
+; CHECK-LABEL: define void @test_outofline
+define void @test_outofline(i8** %p, i64* %r) {
+  %1 = call i8* @my_malloc_outofline(i8* null, i32 100)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  ; CHECK: store i64 100
+  store i64 %2, i64* %r, align 8
+
+
+  %3 = call i8* @my_calloc_outofline(i8* null, i8* null, i32 100, i32 5)
+  store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false)
+  ; CHECK: store i64 500
+  store i64 %4, i64* %r, align 8
+  ret void
+}
+
+declare i8* @my_malloc_i64(i8*, i64) #0
+declare i8* @my_tiny_calloc(i8*, i8*, i8, i8) #1
+declare i8* @my_varied_calloc(i8*, i8*, i32, i8) #1
+
+; CHECK-LABEL: define void @test_overflow
+define void @test_overflow(i8** %p, i32* %r) {
+  %r64 = bitcast i32* %r to i64*
+
+  ; (2**31 + 1) * 2 > 2**31. So overflow. Yay.
+  %big_malloc = call i8* @my_calloc(i8* null, i8* null, i32 2147483649, i32 2)
+  store i8* %big_malloc, i8** %p, align 8
+
+  ; CHECK: @llvm.objectsize
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_malloc, i1 false)
+  store i32 %1, i32* %r, align 4
+
+
+  %big_little_malloc = call i8* @my_tiny_calloc(i8* null, i8* null, i8 127, i8 4)
+  store i8* %big_little_malloc, i8** %p, align 8
+
+  ; CHECK: store i32 508
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_little_malloc, i1 false)
+  store i32 %2, i32* %r, align 4
+
+
+  ; malloc(2**33)
+  %big_malloc_i64 = call i8* @my_malloc_i64(i8* null, i64 8589934592)
+  store i8* %big_malloc_i64, i8** %p, align 8
+
+  ; CHECK: @llvm.objectsize
+  %3 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_malloc_i64, i1 false)
+  store i32 %3, i32* %r, align 4
+
+
+  %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %big_malloc_i64, i1 false)
+  ; CHECK: store i64 8589934592
+  store i64 %4, i64* %r64, align 8
+
+
+  ; Just intended to ensure that we properly handle args of different types...
+  %varied_calloc = call i8* @my_varied_calloc(i8* null, i8* null, i32 1000, i8 5)
+  store i8* %varied_calloc, i8** %p, align 8
+
+  ; CHECK: store i32 5000
+  %5 = call i32 @llvm.objectsize.i32.p0i8(i8* %varied_calloc, i1 false)
+  store i32 %5, i32* %r, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: define void @test_nobuiltin
+; We had a bug where `nobuiltin` would cause `allocsize` to be ignored in
+; @llvm.objectsize calculations.
+define void @test_nobuiltin(i8** %p, i64* %r) {
+  %1 = call i8* @my_malloc(i8* null, i32 100) nobuiltin
+  store i8* %1, i8** %p, align 8
+
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  ; CHECK: store i64 100
+  store i64 %2, i64* %r, align 8
+  ret void
+}
+
+attributes #0 = { allocsize(1) }
+attributes #1 = { allocsize(2, 3) }
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll
new file mode 100644
index 00000000000..d4aa1c5488b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Should be optimized to one and.
+define i1 @test1(i32 %a, i32 %b) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 65280
+; CHECK-NEXT:    [[TMP:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %tmp1 = and i32 %a, 65280
+  %tmp3 = and i32 %b, 65280
+  %tmp = icmp ne i32 %tmp1, %tmp3
+  ret i1 %tmp
+}
+
+define <2 x i1> @test1vec(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @test1vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 65280, i32 65280>
+; CHECK-NEXT:    [[TMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP]]
+;
+  %tmp1 = and <2 x i32> %a, <i32 65280, i32 65280>
+  %tmp3 = and <2 x i32> %b, <i32 65280, i32 65280>
+  %tmp = icmp ne <2 x i32> %tmp1, %tmp3
+  ret <2 x i1> %tmp
+}
+
+define i1 @test2(i64 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %A to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i64 %A, 128
+  %cmp = icmp eq i64 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test2vec(<2 x i64> %A) {
+; CHECK-LABEL: @test2vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> %A to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i64> %A, <i64 128, i64 128>
+  %cmp = icmp eq <2 x i64> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test3(i64 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %A to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i64 %A, 128
+  %cmp = icmp ne i64 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test3vec(<2 x i64> %A) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> %A to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i64> %A, <i64 128, i64 128>
+  %cmp = icmp ne <2 x i64> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll
new file mode 100644
index 00000000000..dd51c6548ee
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll
@@ -0,0 +1,1584 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @PR1738(double %x, double %y) {
+; CHECK-LABEL: @PR1738(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp1 = fcmp ord double %x, 0.0
+  %cmp2 = fcmp ord double %y, 0.0
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+define <2 x i1> @PR1738_vec_undef(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @PR1738_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <2 x double> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %cmp1 = fcmp ord <2 x double> %x, <double 0.0, double undef>
+  %cmp2 = fcmp ord <2 x double> %y, <double undef, double 0.0>
+  %or = and <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %or
+}
+
+define i1 @PR41069(i1 %z, float %c, float %d) {
+; CHECK-LABEL: @PR41069(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord float [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ord1 = fcmp arcp ord float %c, 0.0
+  %and = and i1 %ord1, %z
+  %ord2 = fcmp afn ord float %d, 0.0
+  %r = and i1 %and, %ord2
+  ret i1 %r
+}
+
+define i1 @PR41069_commute(i1 %z, float %c, float %d) {
+; CHECK-LABEL: @PR41069_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ninf ord float [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ord1 = fcmp ninf ord float %c, 0.0
+  %and = and i1 %ord1, %z
+  %ord2 = fcmp ninf reassoc ord float %d, 0.0
+  %r = and i1 %ord2, %and
+  ret i1 %r
+}
+
+; Commute differently and make sure vectors work.
+
+define <2 x i1> @PR41069_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
+; CHECK-LABEL: @PR41069_vec(
+; CHECK-NEXT:    [[ORD1:%.*]] = fcmp ord <2 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <2 x double> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[TMP1]], [[ORD1]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %ord1 = fcmp ord <2 x double> %a, %b
+  %ord2 = fcmp ord <2 x double> %c, <double 0.0, double undef>
+  %and = and <2 x i1> %ord1, %ord2
+  %ord3 = fcmp ord <2 x double> %d, zeroinitializer
+  %r = and <2 x i1> %and, %ord3
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @PR41069_vec_commute(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
+; CHECK-LABEL: @PR41069_vec_commute(
+; CHECK-NEXT:    [[ORD1:%.*]] = fcmp ord <2 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <2 x double> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[TMP1]], [[ORD1]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %ord1 = fcmp ord <2 x double> %a, %b
+  %ord2 = fcmp ord <2 x double> %c, <double 0.0, double undef>
+  %and = and <2 x i1> %ord1, %ord2
+  %ord3 = fcmp ord <2 x double> %d, zeroinitializer
+  %r = and <2 x i1> %ord3, %and
+  ret <2 x i1> %r
+}
+
+define i1 @PR15737(float %a, double %b) {
+; CHECK-LABEL: @PR15737(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ord float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ord double [[B:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %cmp = fcmp ord float %a, 0.000000e+00
+  %cmp1 = fcmp ord double %b, 0.000000e+00
+  %and = and i1 %cmp, %cmp1
+  ret i1 %and
+}
+
+define <2 x i1> @t9(<2 x float> %a, <2 x double> %b) {
+; CHECK-LABEL: @t9(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ord <2 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ord <2 x double> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i1> [[CMP]], [[CMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[AND]]
+;
+  %cmp = fcmp ord <2 x float> %a, zeroinitializer
+  %cmp1 = fcmp ord <2 x double> %b, zeroinitializer
+  %and = and <2 x i1> %cmp, %cmp1
+  ret <2 x i1> %and
+}
+
+define i1 @fcmp_ord_nonzero(float %x, float %y) {
+; CHECK-LABEL: @fcmp_ord_nonzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp1 = fcmp ord float %x, 1.0
+  %cmp2 = fcmp ord float %y, 2.0
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+define <3 x i1> @fcmp_ord_nonzero_vec(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: @fcmp_ord_nonzero_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <3 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %cmp1 = fcmp ord <3 x float> %x, <float 1.0, float 2.0, float 3.0>
+  %cmp2 = fcmp ord <3 x float> %y, <float 3.0, float 2.0, float 1.0>
+  %and = and <3 x i1> %cmp1, %cmp2
+  ret <3 x i1> %and
+}
+
+define i1 @auto_gen_0(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_0(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp false double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_1(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_1(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp oeq double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_2(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oeq double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_3(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_3(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_4(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_4(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_5(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_5(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_6(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_6(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_7(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_7(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_8(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_8(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_9(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_9(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_10(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_10(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_11(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_11(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_12(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_12(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_13(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_13(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_14(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_14(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_15(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_15(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_16(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_16(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_17(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_17(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_18(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_18(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_19(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_19(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_20(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_20(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_21(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_21(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_22(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_22(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_23(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_23(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_24(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_24(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_25(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_25(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_26(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_26(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_27(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_27(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_28(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_28(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_29(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_29(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_30(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_30(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_31(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_31(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_32(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_32(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_33(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_33(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_34(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_34(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_35(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_35(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_36(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_36(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_37(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_37(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_38(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_38(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_39(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_39(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_40(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_40(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_41(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_41(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_42(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_42(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_43(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_43(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_44(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_44(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_45(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_45(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_46(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_46(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_47(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_47(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_48(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_48(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_49(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_49(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_50(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_50(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_51(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_51(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_52(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_52(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_53(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_53(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_54(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_54(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_55(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_55(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_56(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_56(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_57(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_57(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_58(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_58(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_59(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_59(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_60(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_60(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_61(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_61(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_62(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_62(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_63(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_63(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_64(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_65(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_65(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_66(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_66(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_67(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_67(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_68(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_68(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_69(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_69(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_70(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_70(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_71(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_71(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_72(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_72(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_73(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_73(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_74(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_74(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_75(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_75(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_76(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_76(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_77(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_77(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_78(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_78(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_79(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_79(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_80(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_80(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_81(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_81(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_82(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_82(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_83(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_83(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_84(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_84(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_85(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_85(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_86(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_86(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_87(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_87(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_88(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_88(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_89(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_89(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_90(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_90(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_91(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_91(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_92(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_92(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_93(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_93(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_94(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_94(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_95(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_95(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_96(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_96(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_97(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_97(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_98(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_98(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_99(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_99(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_100(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_100(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_101(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_101(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_102(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_102(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_103(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_103(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_104(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_104(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_105(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_105(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_106(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_106(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_107(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_107(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_108(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_108(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_109(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_109(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_110(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_110(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_111(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_111(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_112(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_112(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_113(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_113(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_114(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_114(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_115(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_115(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_116(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_116(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_117(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_117(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_118(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_118(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_119(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_119(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp uno double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_120(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_120(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_121(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_121(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_122(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_122(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_123(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_123(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_124(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_124(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_125(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_125(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_126(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_126(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_127(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_127(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_128(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_128(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_129(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_129(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_130(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_130(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_131(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_131(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_132(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_132(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_133(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_133(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_134(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_134(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp uno double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_135(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_135(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp true double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
diff --git a/llvm/test/Transforms/InstCombine/and-narrow.ll b/llvm/test/Transforms/InstCombine/and-narrow.ll
new file mode 100644
index 00000000000..a8661a9f796
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-narrow.ll
@@ -0,0 +1,192 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -data-layout="n8:16:32" -S | FileCheck %s
+; RUN: opt < %s -instcombine -data-layout="n16"      -S | FileCheck %s
+
+; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792
+
+define i16 @zext_add(i8 %x) {
+; CHECK-LABEL: @zext_add(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = add i16 %z, 44
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_sub(i8 %x) {
+; CHECK-LABEL: @zext_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 -5, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = sub i16 -5, %z
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_mul(i8 %x) {
+; CHECK-LABEL: @zext_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = mul i16 %z, 3
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_lshr(i8 %x) {
+; CHECK-LABEL: @zext_lshr(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = lshr i16 %z, 4
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_ashr(i8 %x) {
+; CHECK-LABEL: @zext_ashr(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = ashr i16 %z, 2
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_shl(i8 %x) {
+; CHECK-LABEL: @zext_shl(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = shl i16 %z, 3
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define <2 x i16> @zext_add_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_add_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 44, i8 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = add <2 x i16> %z, <i16 44, i16 42>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_sub_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_sub_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> <i8 -5, i8 -4>, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = sub <2 x i16> <i16 -5, i16 -4>, %z
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_mul_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_mul_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i8> [[X:%.*]], <i8 3, i8 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = mul <2 x i16> %z, <i16 3, i16 -2>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_lshr_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_lshr_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = lshr <2 x i16> %z, <i16 4, i16 2>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_ashr_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_ashr_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 2, i8 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = ashr <2 x i16> %z, <i16 2, i16 3>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_shl_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_shl_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = shl <2 x i16> %z, <i16 3, i16 2>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+; Don't create poison by narrowing a shift below the shift amount.
+
+define <2 x i16> @zext_lshr_vec_overshift(<2 x i8> %x) {
+; CHECK-LABEL: @zext_lshr_vec_overshift(
+; CHECK-NEXT:    [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i16> [[Z]], <i16 4, i16 8>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i16> [[B]], [[Z]]
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = lshr <2 x i16> %z, <i16 4, i16 8>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+; Don't create poison by narrowing a shift below the shift amount.
+
+define <2 x i16> @zext_shl_vec_overshift(<2 x i8> %x) {
+; CHECK-LABEL: @zext_shl_vec_overshift(
+; CHECK-NEXT:    [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i16> [[Z]], <i16 8, i16 2>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i16> [[B]], [[Z]]
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = shl <2 x i16> %z, <i16 8, i16 2>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/and-or-and.ll b/llvm/test/Transforms/InstCombine/and-or-and.ll
new file mode 100644
index 00000000000..34cad82f4f1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-or-and.ll
@@ -0,0 +1,61 @@
+; If we have an 'and' of the result of an 'or', and one of the 'or' operands
+; cannot have contributed any of the resultant bits, delete the or.  This
+; occurs for very common C/C++ code like this:
+;
+; struct foo { int A : 16; int B : 16; };
+; void test(struct foo *F, int X, int Y) {
+;        F->A = X; F->B = Y;
+; }
+;
+; Which corresponds to test1.
+
+; RUN: opt < %s -instcombine -S | \
+; RUN:   not grep "or "
+
+define i32 @test1(i32 %X, i32 %Y) {
+        %A = and i32 %X, 7              ; <i32> [#uses=1]
+        %B = and i32 %Y, 8              ; <i32> [#uses=1]
+        %C = or i32 %A, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 7              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test2(i32 %X, i8 %Y) {
+        %B = zext i8 %Y to i32          ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 65536          ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+        %B = shl i32 %Y, 1              ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 1              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test4(i32 %X, i32 %Y) {
+        %B = lshr i32 %Y, 31            ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 2              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @or_test1(i32 %X, i32 %Y) {
+        %A = and i32 %X, 1              ; <i32> [#uses=1]
+        ;; This cannot include any bits from X!
+        %B = or i32 %A, 1               ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i8 @or_test2(i8 %X, i8 %Y) {
+        %A = shl i8 %X, 7               ; <i8> [#uses=1]
+        ;; This cannot include any bits from X!
+        %B = or i8 %A, -128             ; <i8> [#uses=1]
+        ret i8 %B
+}
+
diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
new file mode 100644
index 00000000000..516235f94a3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -0,0 +1,255 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @PR1817_1(i32 %X) {
+; CHECK-LABEL: @PR1817_1(
+; CHECK-NEXT:    [[B:%.*]] = icmp ult i32 %X, 10
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = icmp slt i32 %X, 10
+  %B = icmp ult i32 %X, 10
+  %C = and i1 %A, %B
+  ret i1 %C
+}
+
+define i1 @PR1817_2(i32 %X) {
+; CHECK-LABEL: @PR1817_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp slt i32 %X, 10
+; CHECK-NEXT:    ret i1 [[A]]
+;
+  %A = icmp slt i32 %X, 10
+  %B = icmp ult i32 %X, 10
+  %C = or i1 %A, %B
+  ret i1 %C
+}
+
+define i1 @PR2330(i32 %a, i32 %b) {
+; CHECK-LABEL: @PR2330(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %b, %a
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ult i32 %a, 8
+  %cmp2 = icmp ult i32 %b, 8
+  %and = and i1 %cmp2, %cmp1
+  ret i1 %and
+}
+
+; if LHSC and RHSC differ only by one bit:
+; (X == C1 || X == C2) -> (X | (C1 ^ C2)) == C2
+; PR14708: https://bugs.llvm.org/show_bug.cgi?id=14708
+
+define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants1(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 50
+  %cmp2 = icmp eq i32 %x, 51
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+; (X != C1 && X != C2) -> (X | (C1 ^ C2)) != C2
+
+define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants1(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 51
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i32 %x, 51
+  %cmp2 = icmp ne i32 %x, 50
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; The constants are not necessarily off-by-one, just off-by-one-bit.
+
+define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 32
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 97
+  %cmp2 = icmp eq i32 %x, 65
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i19 %x, 128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 193
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i19 %x, 65
+  %cmp2 = icmp ne i19 %x, 193
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Make sure the constants are treated as unsigned when comparing them.
+
+define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants3(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], -2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i8 %x, 254
+  %cmp2 = icmp eq i8 %x, 126
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], -63
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i8 %x, 65
+  %cmp2 = icmp ne i8 %x, 193
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Use an 'add' to eliminate an icmp if the constants are off-by-one (not off-by-one-bit).
+; (X == 13 | X == 14) -> X-13 <u 2
+
+define i1 @or_eq_with_diff_one(i8 %x) {
+; CHECK-LABEL: @or_eq_with_diff_one(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 %x, -13
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i8 %x, 13
+  %cmp2 = icmp eq i8 %x, 14
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+; (X != 40 | X != 39) -> X-39 >u 1
+
+define i1 @and_ne_with_diff_one(i32 %x) {
+; CHECK-LABEL: @and_ne_with_diff_one(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %x, -39
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i32 %x, 40
+  %cmp2 = icmp ne i32 %x, 39
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Make sure the constants are treated as signed when comparing them.
+; PR32524: https://bugs.llvm.org/show_bug.cgi?id=32524
+
+define i1 @or_eq_with_diff_one_signed(i32 %x) {
+; CHECK-LABEL: @or_eq_with_diff_one_signed(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 0
+  %cmp2 = icmp eq i32 %x, -1
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_diff_one_signed(i64 %x) {
+; CHECK-LABEL: @and_ne_with_diff_one_signed(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i64 %x, -1
+  %cmp2 = icmp ne i64 %x, 0
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Vectors with splat constants get the same folds.
+
+define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> %x, <i32 32, i32 32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 97, i32 97>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %cmp1 = icmp eq <2 x i32> %x, <i32 97, i32 97>
+  %cmp2 = icmp eq <2 x i32> %x, <i32 65, i32 65>
+  %or = or <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %or
+}
+
+define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) {
+; CHECK-LABEL: @and_ne_with_diff_one_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> %x, <i32 -39, i32 -39>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %cmp1 = icmp ne <2 x i32> %x, <i32 40, i32 40>
+  %cmp2 = icmp ne <2 x i32> %x, <i32 39, i32 39>
+  %and = and <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %and
+}
+
+; This is a fuzzer-generated test that would assert because
+; we'd get into foldAndOfICmps() without running InstSimplify
+; on an 'and' that should have been killed. It's not obvious
+; why, but removing anything hides the bug, hence the long test.
+
+define void @simplify_before_foldAndOfICmps() {
+; CHECK-LABEL: @simplify_before_foldAndOfICmps(
+; CHECK-NEXT:    [[A8:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    [[L7:%.*]] = load i16, i16* [[A8]], align 2
+; CHECK-NEXT:    [[C10:%.*]] = icmp ult i16 [[L7]], 2
+; CHECK-NEXT:    [[C7:%.*]] = icmp slt i16 [[L7]], 0
+; CHECK-NEXT:    [[C18:%.*]] = or i1 [[C7]], [[C10]]
+; CHECK-NEXT:    [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64
+; CHECK-NEXT:    [[G26:%.*]] = getelementptr i1, i1* null, i64 [[TMP1]]
+; CHECK-NEXT:    store i16 [[L7]], i16* undef, align 2
+; CHECK-NEXT:    store i1 [[C18]], i1* undef, align 1
+; CHECK-NEXT:    store i1* [[G26]], i1** undef, align 8
+; CHECK-NEXT:    ret void
+;
+  %A8 = alloca i16
+  %L7 = load i16, i16* %A8
+  %G21 = getelementptr i16, i16* %A8, i8 -1
+  %B11 = udiv i16 %L7, -1
+  %G4 = getelementptr i16, i16* %A8, i16 %B11
+  %L2 = load i16, i16* %G4
+  %L = load i16, i16* %G4
+  %B23 = mul i16 %B11, %B11
+  %L4 = load i16, i16* %A8
+  %B21 = sdiv i16 %L7, %L4
+  %B7 = sub i16 0, %B21
+  %B18 = mul i16 %B23, %B7
+  %C10 = icmp ugt i16 %L, %B11
+  %B20 = and i16 %L7, %L2
+  %B1 = mul i1 %C10, true
+  %C5 = icmp sle i16 %B21, %L
+  %C11 = icmp ule i16 %B21, %L
+  %C7 = icmp slt i16 %B20, 0
+  %B29 = srem i16 %L4, %B18
+  %B15 = add i1 %C7, %C10
+  %B19 = add i1 %C11, %B15
+  %C6 = icmp sge i1 %C11, %B19
+  %B33 = or i16 %B29, %L4
+  %C13 = icmp uge i1 %C5, %B1
+  %C3 = icmp ult i1 %C13, %C6
+  store i16 undef, i16* %G21
+  %C18 = icmp ule i1 %C10, %C7
+  %G26 = getelementptr i1, i1* null, i1 %C3
+  store i16 %B33, i16* undef
+  store i1 %C18, i1* undef
+  store i1* %G26, i1** undef
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/and-or-not.ll b/llvm/test/Transforms/InstCombine/and-or-not.ll
new file mode 100644
index 00000000000..7bd4ad7b3bb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-or-not.ll
@@ -0,0 +1,642 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1510
+
+; (a | b) & ~(a & b) --> a ^ b
+
+define i32 @and_to_xor1(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_to_xor1(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %a, %b
+  %not = xor i32 %and, -1
+  %and2 = and i32 %or, %not
+  ret i32 %and2
+}
+
+; ~(a & b) & (a | b) --> a ^ b
+
+define i32 @and_to_xor2(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_to_xor2(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %a, %b
+  %not = xor i32 %and, -1
+  %and2 = and i32 %not, %or
+  ret i32 %and2
+}
+
+; (a | b) & ~(b & a) --> a ^ b
+
+define i32 @and_to_xor3(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_to_xor3(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %b, %a
+  %not = xor i32 %and, -1
+  %and2 = and i32 %or, %not
+  ret i32 %and2
+}
+
+; ~(a & b) & (b | a) --> a ^ b
+
+define i32 @and_to_xor4(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_to_xor4(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %or = or i32 %b, %a
+  %and = and i32 %a, %b
+  %not = xor i32 %and, -1
+  %and2 = and i32 %not, %or
+  ret i32 %and2
+}
+
+define <4 x i32> @and_to_xor1_vec(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @and_to_xor1_vec(
+; CHECK-NEXT:    [[AND2:%.*]] = xor <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[AND2]]
+;
+  %or = or <4 x i32> %a, %b
+  %and = and <4 x i32> %a, %b
+  %not = xor <4 x i32> %and, < i32 -1, i32 -1, i32 -1, i32 -1 >
+  %and2 = and <4 x i32> %or, %not
+  ret <4 x i32> %and2
+}
+
+; In the next 4 tests, cast instructions are used to thwart operand complexity
+; canonicalizations, so we can test all of the commuted patterns.
+
+; (a | ~b) & (~a | b) --> ~(a ^ b)
+
+define i32 @and_to_nxor1(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor1(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %nota, %b
+  %and = and i32 %or1, %or2
+  ret i32 %and
+}
+
+; (a | ~b) & (b | ~a) --> ~(a ^ b)
+
+define i32 @and_to_nxor2(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor2(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %b, %nota
+  %and = and i32 %or1, %or2
+  ret i32 %and
+}
+
+; (~a | b) & (a | ~b) --> ~(a ^ b)
+
+define i32 @and_to_nxor3(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor3(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %b
+  %or2 = or i32 %a, %notb
+  %and = and i32 %or1, %or2
+  ret i32 %and
+}
+
+; (~a | b) & (~b | a) --> ~(a ^ b)
+
+define i32 @and_to_nxor4(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor4(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %b
+  %or2 = or i32 %notb, %a
+  %and = and i32 %or1, %or2
+  ret i32 %and
+}
+
+; (a & ~b) | (~a & b) --> a ^ b
+
+define i32 @or_to_xor1(float %fa, float %fb) {
+; CHECK-LABEL: @or_to_xor1(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %nota, %b
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; (a & ~b) | (b & ~a) --> a ^ b
+
+define i32 @or_to_xor2(float %fa, float %fb) {
+; CHECK-LABEL: @or_to_xor2(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %b, %nota
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; (~a & b) | (~b & a) --> a ^ b
+
+define i32 @or_to_xor3(float %fa, float %fb) {
+; CHECK-LABEL: @or_to_xor3(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %nota, %b
+  %and2 = and i32 %notb, %a
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; (~a & b) | (a & ~b) --> a ^ b
+
+define i32 @or_to_xor4(float %fa, float %fb) {
+; CHECK-LABEL: @or_to_xor4(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %nota, %b
+  %and2 = and i32 %a, %notb
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; (a & b) | ~(a | b) --> ~(a ^ b)
+
+define i32 @or_to_nxor1(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[OR2]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %a, %b
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %and, %notor
+  ret i32 %or2
+}
+
+; (a & b) | ~(b | a) --> ~(a ^ b)
+
+define i32 @or_to_nxor2(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor2(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[OR2]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %b, %a
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %and, %notor
+  ret i32 %or2
+}
+
+; ~(a | b) | (a & b) --> ~(a ^ b)
+
+define i32 @or_to_nxor3(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor3(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[OR2]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %a, %b
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %notor, %and
+  ret i32 %or2
+}
+
+; ~(a | b) | (b & a) --> ~(a ^ b)
+
+define i32 @or_to_nxor4(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor4(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[OR2]]
+;
+  %and = and i32 %b, %a
+  %or = or i32 %a, %b
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %notor, %and
+  ret i32 %or2
+}
+
+; (a & b) ^ (a | b) --> a ^ b
+
+define i32 @xor_to_xor1(i32 %a, i32 %b) {
+; CHECK-LABEL: @xor_to_xor1(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %a, %b
+  %xor = xor i32 %and, %or
+  ret i32 %xor
+}
+
+; (a & b) ^ (b | a) --> a ^ b
+
+define i32 @xor_to_xor2(i32 %a, i32 %b) {
+; CHECK-LABEL: @xor_to_xor2(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %b, %a
+  %xor = xor i32 %and, %or
+  ret i32 %xor
+}
+
+; (a | b) ^ (a & b) --> a ^ b
+
+define i32 @xor_to_xor3(i32 %a, i32 %b) {
+; CHECK-LABEL: @xor_to_xor3(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %a, %b
+  %xor = xor i32 %or, %and
+  ret i32 %xor
+}
+
+; (a | b) ^ (b & a) --> a ^ b
+
+define i32 @xor_to_xor4(i32 %a, i32 %b) {
+; CHECK-LABEL: @xor_to_xor4(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %b, %a
+  %xor = xor i32 %or, %and
+  ret i32 %xor
+}
+
+; (a | ~b) ^ (~a | b) --> a ^ b
+
+; In the next 8 tests, cast instructions are used to thwart operand complexity
+; canonicalizations, so we can test all of the commuted patterns.
+
+define i32 @xor_to_xor5(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor5(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %nota, %b
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (a | ~b) ^ (b | ~a) --> a ^ b
+
+define i32 @xor_to_xor6(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor6(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %b, %nota
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (~a | b) ^ (a | ~b) --> a ^ b
+
+define i32 @xor_to_xor7(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor7(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %nota, %b
+  %xor = xor i32 %or2, %or1
+  ret i32 %xor
+}
+
+; (~a | b) ^ (~b | a) --> a ^ b
+
+define i32 @xor_to_xor8(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor8(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %notb, %a
+  %or2 = or i32 %nota, %b
+  %xor = xor i32 %or2, %or1
+  ret i32 %xor
+}
+
+; (a & ~b) ^ (~a & b) --> a ^ b
+
+define i32 @xor_to_xor9(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor9(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %nota, %b
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+}
+
+; (a & ~b) ^ (b & ~a) --> a ^ b
+
+define i32 @xor_to_xor10(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor10(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %b, %nota
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+}
+
+; (~a & b) ^ (a & ~b) --> a ^ b
+
+define i32 @xor_to_xor11(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor11(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %nota, %b
+  %xor = xor i32 %and2, %and1
+  ret i32 %xor
+}
+
+; (~a & b) ^ (~b & a) --> a ^ b
+
+define i32 @xor_to_xor12(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor12(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %notb, %a
+  %and2 = and i32 %nota, %b
+  %xor = xor i32 %and2, %and1
+  ret i32 %xor
+}
+
+; https://bugs.llvm.org/show_bug.cgi?id=32830
+; Make sure we're matching operands correctly and not folding things wrongly.
+
+define i64 @PR32830(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @PR32830(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i64 [[A:%.*]], -1
+; CHECK-NEXT:    [[NOTB:%.*]] = xor i64 [[B:%.*]], -1
+; CHECK-NEXT:    [[OR1:%.*]] = or i64 [[NOTB]], [[A]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i64 [[NOTA]], [[C:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[OR1]], [[OR2]]
+; CHECK-NEXT:    ret i64 [[AND]]
+;
+  %nota = xor i64 %a, -1
+  %notb = xor i64 %b, -1
+  %or1 = or i64 %notb, %a
+  %or2 = or i64 %nota, %c
+  %and = and i64 %or1, %or2
+  ret i64 %and
+}
+
+; (~a | b) & (~b | a) --> ~(a ^ b)
+; TODO: this increases instruction count if the pieces have additional users
+define i32 @and_to_nxor_multiuse(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor_multiuse(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[NOTB:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[NOTA]], [[B]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[NOTB]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul i32 [[MUL1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[MUL2]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %b
+  %or2 = or i32 %notb, %a
+  %and = and i32 %or1, %or2
+  %mul1 = mul i32 %or1, %or2 ; here to increase the use count of the inputs to the and
+  %mul2 = mul i32 %mul1, %and
+  ret i32 %mul2
+}
+
+; (a & b) | ~(a | b) --> ~(a ^ b)
+; TODO: this increases instruction count if the pieces have additional users
+define i32 @or_to_nxor_multiuse(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor_multiuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A]], [[B]]
+; CHECK-NEXT:    [[NOTOR:%.*]] = xor i32 [[OR]], -1
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul i32 [[MUL1]], [[OR2]]
+; CHECK-NEXT:    ret i32 [[MUL2]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %a, %b
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %and, %notor
+  %mul1 = mul i32 %and, %notor ; here to increase the use count of the inputs to the or
+  %mul2 = mul i32 %mul1, %or2
+  ret i32 %mul2
+}
+
+; (a | b) ^ (~a | ~b) --> ~(a ^ b)
+define i32 @xor_to_xnor1(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor1(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %b
+  %or2 = or i32 %nota, %notb
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (a | b) ^ (~b | ~a) --> ~(a ^ b)
+define i32 @xor_to_xnor2(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor2(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %b
+  %or2 = or i32 %notb, %nota
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (~a | ~b) ^ (a | b) --> ~(a ^ b)
+define i32 @xor_to_xnor3(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor3(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %notb
+  %or2 = or i32 %a, %b
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (~a | ~b) ^ (b | a) --> ~(a ^ b)
+define i32 @xor_to_xnor4(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor4(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %notb
+  %or2 = or i32 %b, %a
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
diff --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll
new file mode 100644
index 00000000000..fa8e158fb47
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-or.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @func1(i32 %a, i32 %b) {
+; CHECK-LABEL: @func1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = or i32 %b, %a
+  %tmp1 = and i32 %tmp, 1
+  %tmp2 = and i32 %b, -2
+  %tmp3 = or i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @func2(i32 %a, i32 %b) {
+; CHECK-LABEL: @func2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = or i32 %a, %b
+  %tmp1 = and i32 1, %tmp
+  %tmp2 = and i32 -2, %b
+  %tmp3 = or i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @func3(i32 %a, i32 %b) {
+; CHECK-LABEL: @func3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = or i32 %b, %a
+  %tmp1 = and i32 %tmp, 1
+  %tmp2 = and i32 %b, -2
+  %tmp3 = or i32 %tmp2, %tmp1
+  ret i32 %tmp3
+}
+
+define i32 @func4(i32 %a, i32 %b) {
+; CHECK-LABEL: @func4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = or i32 %a, %b
+  %tmp1 = and i32 1, %tmp
+  %tmp2 = and i32 -2, %b
+  %tmp3 = or i32 %tmp2, %tmp1
+  ret i32 %tmp3
+}
+
+; Check variants of:
+; and ({x}or X, Y), C --> {x}or X, (and Y, C)
+; ...in the following 5 tests.
+
+define i8 @and_or_hoist_mask(i8 %a, i8 %b) {
+; CHECK-LABEL: @and_or_hoist_mask(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i8 %a, 6
+; CHECK-NEXT:    [[B_MASKED:%.*]] = and i8 %b, 3
+; CHECK-NEXT:    [[AND:%.*]] = or i8 [[SH]], [[B_MASKED]]
+; CHECK-NEXT:    ret i8 [[AND]]
+;
+  %sh = lshr i8 %a, 6
+  %or = or i8 %sh, %b
+  %and = and i8 %or, 3
+  ret i8 %and
+}
+
+define <2 x i8> @and_xor_hoist_mask_vec_splat(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @and_xor_hoist_mask_vec_splat(
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> %a, <i8 6, i8 6>
+; CHECK-NEXT:    [[B_MASKED:%.*]] = and <2 x i8> %b, <i8 3, i8 3>
+; CHECK-NEXT:    [[AND:%.*]] = xor <2 x i8> [[SH]], [[B_MASKED]]
+; CHECK-NEXT:    ret <2 x i8> [[AND]]
+;
+  %sh = lshr <2 x i8> %a, <i8 6, i8 6>
+  %xor = xor <2 x i8> %sh, %b
+  %and = and <2 x i8> %xor, <i8 3, i8 3>
+  ret <2 x i8> %and
+}
+
+define i8 @and_xor_hoist_mask_commute(i8 %a, i8 %b) {
+; CHECK-LABEL: @and_xor_hoist_mask_commute(
+; CHECK-NEXT:    [[C:%.*]] = mul i8 %b, 43
+; CHECK-NEXT:    [[SH:%.*]] = lshr i8 %a, 6
+; CHECK-NEXT:    [[C_MASKED:%.*]] = and i8 [[C]], 3
+; CHECK-NEXT:    [[AND:%.*]] = xor i8 [[C_MASKED]], [[SH]]
+; CHECK-NEXT:    ret i8 [[AND]]
+;
+  %c = mul i8 %b, 43 ; thwart complexity-based ordering
+  %sh = lshr i8 %a, 6
+  %xor = xor i8 %c, %sh
+  %and = and i8 %xor, 3
+  ret i8 %and
+}
+
+define <2 x i8> @and_or_hoist_mask_commute_vec_splat(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @and_or_hoist_mask_commute_vec_splat(
+; CHECK-NEXT:    [[C:%.*]] = mul <2 x i8> %b, <i8 43, i8 43>
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> %a, <i8 6, i8 6>
+; CHECK-NEXT:    [[C_MASKED:%.*]] = and <2 x i8> [[C]], <i8 3, i8 3>
+; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[C_MASKED]], [[SH]]
+; CHECK-NEXT:    ret <2 x i8> [[AND]]
+;
+  %c = mul <2 x i8> %b, <i8 43, i8 43> ; thwart complexity-based ordering
+  %sh = lshr <2 x i8> %a, <i8 6, i8 6>
+  %or = or <2 x i8> %c, %sh
+  %and = and <2 x i8> %or, <i8 3, i8 3>
+  ret <2 x i8> %and
+}
+
+; Don't transform if the 'or' has multiple uses because that would increase instruction count.
+
+define i8 @and_or_do_not_hoist_mask(i8 %a, i8 %b) {
+; CHECK-LABEL: @and_or_do_not_hoist_mask(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i8 %a, 6
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[SH]], %b
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[OR]], 3
+; CHECK-NEXT:    [[EXTRA_USE_OF_OR:%.*]] = mul i8 [[OR]], [[AND]]
+; CHECK-NEXT:    ret i8 [[EXTRA_USE_OF_OR]]
+;
+  %sh = lshr i8 %a, 6
+  %or = or i8 %sh, %b
+  %and = and i8 %or, 3
+  %extra_use_of_or = mul i8 %or, %and
+  ret i8 %extra_use_of_or
+}
+
diff --git a/llvm/test/Transforms/InstCombine/and-xor-merge.ll b/llvm/test/Transforms/InstCombine/and-xor-merge.ll
new file mode 100644
index 00000000000..b9a6a536ce7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-xor-merge.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (x&z) ^ (y&z) -> (x^y)&z
+define i32 @test1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %tmp61 = xor i32 %x, %y
+; CHECK-NEXT: %tmp7 = and i32 %tmp61, %z
+; CHECK-NEXT: ret i32 %tmp7
+        %tmp3 = and i32 %z, %x
+        %tmp6 = and i32 %z, %y
+        %tmp7 = xor i32 %tmp3, %tmp6
+        ret i32 %tmp7
+}
+
+; (x & y) ^ (x|y) -> x^y
+define i32 @test2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %tmp7 = xor i32 %y, %x
+; CHECK-NEXT: ret i32 %tmp7
+        %tmp3 = and i32 %y, %x
+        %tmp6 = or i32 %y, %x
+        %tmp7 = xor i32 %tmp3, %tmp6
+        ret i32 %tmp7
+}
diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll
new file mode 100644
index 00000000000..1eb871e594c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll
@@ -0,0 +1,343 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; a & (a ^ b) --> a & ~b
+
+define i32 @and_xor_common_op(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %a, %b
+  %r = and i32 %a, %xor
+  ret i32 %r
+}
+
+; a & (b ^ a) --> a & ~b
+
+define i32 @and_xor_common_op_commute1(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute1(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %b, %a
+  %r = and i32 %a, %xor
+  ret i32 %r
+}
+
+; (b ^ a) & a --> a & ~b
+
+define i32 @and_xor_common_op_commute2(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute2(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %b, %a
+  %r = and i32 %xor, %a
+  ret i32 %r
+}
+
+; (a ^ b) & a --> a & ~b
+
+define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute3(
+; CHECK-NEXT:    [[A:%.*]] = udiv <2 x i32> <i32 42, i32 43>, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv <2 x i32> <i32 43, i32 42>, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[B]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[A]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = udiv <2 x i32> <i32 42, i32 43>, %pa ; thwart complexity-based canonicalization
+  %b = udiv <2 x i32> <i32 43, i32 42>, %pb ; thwart complexity-based canonicalization
+  %xor = xor <2 x i32> %a, %b
+  %r = and <2 x i32> %xor, %a
+  ret <2 x i32> %r
+}
+
+; It's ok to match a common constant.
+; TODO: The xor should be a 'not' op (-1 constant), but demanded bits shrinks it.
+
+define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) {
+; CHECK-LABEL: @and_xor_common_op_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
+  %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
+  ret <4 x i32> %2
+}
+
+; a & (a ^ ~b) --> a & b
+
+define i32 @and_xor_not_common_op(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_xor_not_common_op(
+; CHECK-NEXT:    [[T4:%.*]] = and i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[T4]]
+;
+  %b2 = xor i32 %b, -1
+  %t2 = xor i32 %a, %b2
+  %t4 = and i32 %t2, %a
+  ret i32 %t4
+}
+
+; rdar://10770603
+; (x & y) | (x ^ y) -> x | y
+
+define i64 @or(i64 %x, i64 %y) {
+; CHECK-LABEL: @or(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = and i64 %y, %x
+  %2 = xor i64 %y, %x
+  %3 = add i64 %1, %2
+  ret i64 %3
+}
+
+; (x & y) + (x ^ y) -> x | y
+
+define i64 @or2(i64 %x, i64 %y) {
+; CHECK-LABEL: @or2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = and i64 %y, %x
+  %2 = xor i64 %y, %x
+  %3 = or i64 %1, %2
+  ret i64 %3
+}
+
+; PR37098 - https://bugs.llvm.org/show_bug.cgi?id=37098
+; Reassociate bitwise logic to eliminate a shift.
+; There are 4 commuted * 3 shift ops * 3 logic ops = 36 potential variations of this fold.
+; Mix the commutation options to provide coverage using less tests.
+
+define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @and_shl(
+; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = shl i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = and i8 %sx, %z
+  %r = and i8 %sy, %a
+  ret i8 %r
+}
+
+define i8 @or_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_shl(
+; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = shl i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = or i8 %a, %sy
+  ret i8 %r
+}
+
+define i8 @xor_shl(i8 %x, i8 %y, i8 %zarg, i8 %shamt) {
+; CHECK-LABEL: @xor_shl(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]]
+; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[Z]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
+  %sx = shl i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = xor i8 %z, %sx
+  %r = xor i8 %a, %sy
+  ret i8 %r
+}
+
+define i8 @and_lshr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) {
+; CHECK-LABEL: @and_lshr(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]]
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[Z]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
+  %sx = lshr i8 %x, %shamt
+  %sy = lshr i8 %y, %shamt
+  %a = and i8 %z, %sx
+  %r = and i8 %sy, %a
+  ret i8 %r
+}
+
+define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_lshr(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = lshr i8 %x, %shamt
+  %sy = lshr i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = or i8 %sy, %a
+  ret i8 %r
+}
+
+define i8 @xor_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @xor_lshr(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = lshr i8 %x, %shamt
+  %sy = lshr i8 %y, %shamt
+  %a = xor i8 %sx, %z
+  %r = xor i8 %a, %sy
+  ret i8 %r
+}
+
+define i8 @and_ashr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) {
+; CHECK-LABEL: @and_ashr(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]]
+; CHECK-NEXT:    [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[Z]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
+  %sx = ashr i8 %x, %shamt
+  %sy = ashr i8 %y, %shamt
+  %a = and i8 %z, %sx
+  %r = and i8 %a, %sy
+  ret i8 %r
+}
+
+define i8 @or_ashr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) {
+; CHECK-LABEL: @or_ashr(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]]
+; CHECK-NEXT:    [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[Z]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
+  %sx = ashr i8 %x, %shamt
+  %sy = ashr i8 %y, %shamt
+  %a = or i8 %z, %sx
+  %r = or i8 %sy, %a
+  ret i8 %r
+}
+
+define <2 x i8> @xor_ashr(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z, <2 x i8> %shamt) {
+; CHECK-LABEL: @xor_ashr(
+; CHECK-NEXT:    [[SX:%.*]] = ashr <2 x i8> [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = ashr <2 x i8> [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = xor <2 x i8> [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[A]], [[SY]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %sx = ashr <2 x i8> %x, %shamt
+  %sy = ashr <2 x i8> %y, %shamt
+  %a = xor <2 x i8> %sx, %z
+  %r = xor <2 x i8> %a, %sy
+  ret <2 x i8> %r
+}
+
+; Negative test - different logic ops
+
+define i8 @or_and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_and_shl(
+; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = shl i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = and i8 %sy, %a
+  ret i8 %r
+}
+
+; Negative test - different shift ops
+
+define i8 @or_lshr_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_lshr_shl(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = lshr i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = or i8 %a, %sy
+  ret i8 %r
+}
+
+; Negative test - different shift amounts
+
+define i8 @or_lshr_shamt2(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_lshr_shamt2(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = lshr i8 %x, 5
+  %sy = lshr i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = or i8 %sy, %a
+  ret i8 %r
+}
+
+; Negative test - multi-use
+
+define i8 @xor_lshr_multiuse(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @xor_lshr_multiuse(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[A]], [[R]]
+; CHECK-NEXT:    ret i8 [[R2]]
+;
+  %sx = lshr i8 %x, %shamt
+  %sy = lshr i8 %y, %shamt
+  %a = xor i8 %sx, %z
+  %r = xor i8 %a, %sy
+  %r2 = sdiv i8 %a, %r
+  ret i8 %r2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
new file mode 100644
index 00000000000..4925013b195
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -0,0 +1,839 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; There should be no 'and' instructions left in any test.
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = and i32 %A, 0
+  ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i32 %A
+;
+  %B = and i32 %A, -1
+  ret i32 %B
+}
+
+define i1 @test3(i1 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = and i1 %A, false
+  ret i1 %B
+}
+
+define i1 @test4(i1 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret i1 %A
+;
+  %B = and i1 %A, true
+  ret i1 %B
+}
+
+define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i32 %A
+;
+  %B = and i32 %A, %A
+  ret i32 %B
+}
+
+define i1 @test6(i1 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i1 %A
+;
+  %B = and i1 %A, %A
+  ret i1 %B
+}
+
+; A & ~A == 0
+define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i32 0
+;
+  %NotA = xor i32 %A, -1
+  %B = and i32 %A, %NotA
+  ret i32 %B
+}
+
+; AND associates
+define i8 @test8(i8 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = and i8 %A, 3
+  %C = and i8 %B, 4
+  ret i8 %C
+}
+
+; Test of sign bit, convert to setle %A, 0
+define i1 @test9(i32 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 %A, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i32 %A, -2147483648
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
+
+; Test of sign bit, convert to setle %A, 0
+define i1 @test9a(i32 %A) {
+; CHECK-LABEL: @test9a(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 %A, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i32 %A, -2147483648
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
+
+define i32 @test10(i32 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i32 1
+;
+  %B = and i32 %A, 12
+  %C = xor i32 %B, 15
+  ; (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+  %D = and i32 %C, 1
+  ret i32 %D
+}
+
+define i32 @test11(i32 %A, i32* %P) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[B:%.*]] = or i32 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = xor i32 [[B]], 12
+; CHECK-NEXT:    store i32 [[C]], i32* %P, align 4
+; CHECK-NEXT:    ret i32 3
+;
+  %B = or i32 %A, 3
+  %C = xor i32 %B, 12
+  ; additional use of C
+  store i32 %C, i32* %P
+  ; %C = and uint %B, 3 --> 3
+  %D = and i32 %C, 3
+  ret i32 %D
+}
+
+define i1 @test12(i32 %A, i32 %B) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 %A, %B
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %C1 = icmp ult i32 %A, %B
+  %C2 = icmp ule i32 %A, %B
+  ; (A < B) & (A <= B) === (A < B)
+  %D = and i1 %C1, %C2
+  ret i1 %D
+}
+
+define i1 @test13(i32 %A, i32 %B) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret i1 false
+;
+  %C1 = icmp ult i32 %A, %B
+  %C2 = icmp ugt i32 %A, %B
+  ; (A < B) & (A > B) === false
+  %D = and i1 %C1, %C2
+  ret i1 %D
+}
+
+define i1 @test14(i8 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 %A, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i8 %A, -128
+  %C = icmp ne i8 %B, 0
+  ret i1 %C
+}
+
+define i8 @test15(i8 %A) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = lshr i8 %A, 7
+  ; Always equals zero
+  %C = and i8 %B, 2
+  ret i8 %C
+}
+
+define i8 @test16(i8 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = shl i8 %A, 2
+  %C = and i8 %B, 3
+  ret i8 %C
+}
+
+define i1 @test18(i32 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 %A, 127
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i32 %A, -128
+  ;; C >= 128
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
+
+define <2 x i1> @test18_vec(<2 x i32> %A) {
+; CHECK-LABEL: @test18_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> %A, <i32 127, i32 127>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = and <2 x i32> %A, <i32 -128, i32 -128>
+  %C = icmp ne <2 x i32> %B, zeroinitializer
+  ret <2 x i1> %C
+}
+
+define i1 @test18a(i8 %A) {
+; CHECK-LABEL: @test18a(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 %A, 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i8 %A, -2
+  %C = icmp eq i8 %B, 0
+  ret i1 %C
+}
+
+define <2 x i1> @test18a_vec(<2 x i8> %A) {
+; CHECK-LABEL: @test18a_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> %A, <i8 2, i8 2>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = and <2 x i8> %A, <i8 -2, i8 -2>
+  %C = icmp eq <2 x i8> %B, zeroinitializer
+  ret <2 x i1> %C
+}
+
+define i32 @test19(i32 %A) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 %A, 3
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = shl i32 %A, 3
+  ;; Clearing a zero bit
+  %C = and i32 %B, -2
+  ret i32 %C
+}
+
+define i8 @test20(i8 %A) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[C:%.*]] = lshr i8 %A, 7
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %C = lshr i8 %A, 7
+  ;; Unneeded
+  %D = and i8 %C, 1
+  ret i8 %D
+}
+
+define i1 @test23(i32 %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %A, 2
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = icmp sgt i32 %A, 1
+  %C = icmp sle i32 %A, 2
+  %D = and i1 %B, %C
+  ret i1 %D
+}
+
+; FIXME: Vectors should fold too.
+define <2 x i1> @test23vec(<2 x i32> %A) {
+; CHECK-LABEL: @test23vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt <2 x i32> %A, <i32 1, i32 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> %A, <i32 3, i32 3>
+; CHECK-NEXT:    [[D:%.*]] = and <2 x i1> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = icmp sgt <2 x i32> %A, <i32 1, i32 1>
+  %C = icmp sle <2 x i32> %A, <i32 2, i32 2>
+  %D = and <2 x i1> %B, %C
+  ret <2 x i1> %D
+}
+
+define i1 @test24(i32 %A) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 %A, 2
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = icmp sgt i32 %A, 1
+  %C = icmp ne i32 %A, 2
+  ;; A > 2
+  %D = and i1 %B, %C
+  ret i1 %D
+}
+
+define i1 @test25(i32 %A) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 %A, -50
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A_OFF]], 50
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = icmp sge i32 %A, 50
+  %C = icmp slt i32 %A, 100
+  %D = and i1 %B, %C
+  ret i1 %D
+}
+
+; FIXME: Vectors should fold too.
+define <2 x i1> @test25vec(<2 x i32> %A) {
+; CHECK-LABEL: @test25vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt <2 x i32> %A, <i32 49, i32 49>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> %A, <i32 100, i32 100>
+; CHECK-NEXT:    [[D:%.*]] = and <2 x i1> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = icmp sge <2 x i32> %A, <i32 50, i32 50>
+  %C = icmp slt <2 x i32> %A, <i32 100, i32 100>
+  %D = and <2 x i1> %B, %C
+  ret <2 x i1> %D
+}
+
+define i8 @test27(i8 %A) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = and i8 %A, 4
+  %C = sub i8 %B, 16
+  ;; 0xF0
+  %D = and i8 %C, -16
+  %E = add i8 %D, 16
+  ret i8 %E
+}
+
+;; This is just a zero-extending shr.
+define i32 @test28(i32 %X) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[Y1:%.*]] = lshr i32 %X, 24
+; CHECK-NEXT:    ret i32 [[Y1]]
+;
+  ;; Sign extend
+  %Y = ashr i32 %X, 24
+  ;; Mask out sign bits
+  %Z = and i32 %Y, 255
+  ret i32 %Z
+}
+
+define i32 @test29(i8 %X) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[Y:%.*]] = zext i8 %X to i32
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %Y = zext i8 %X to i32
+  ;; Zero extend makes this unneeded.
+  %Z = and i32 %Y, 255
+  ret i32 %Z
+}
+
+define i32 @test30(i1 %X) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[Y:%.*]] = zext i1 %X to i32
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %Y = zext i1 %X to i32
+  %Z = and i32 %Y, 1
+  ret i32 %Z
+}
+
+define i32 @test31(i1 %X) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[Y:%.*]] = zext i1 %X to i32
+; CHECK-NEXT:    [[Z:%.*]] = shl nuw nsw i32 [[Y]], 4
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %Y = zext i1 %X to i32
+  %Z = shl i32 %Y, 4
+  %A = and i32 %Z, 16
+  ret i32 %A
+}
+
+; Demanded bit analysis allows us to eliminate the add.
+
+define <2 x i32> @and_demanded_bits_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @and_demanded_bits_splat_vec(
+; CHECK-NEXT:    [[Z:%.*]] = and <2 x i32> %x, <i32 7, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[Z]]
+;
+  %y = add <2 x i32> %x, <i32 8, i32 8>
+  %z = and <2 x i32> %y, <i32 7, i32 7>
+  ret <2 x i32> %z
+}
+
+; zext (x >> 8) has all zeros in the high 24-bits:  0x000000xx
+; (y | 255) has all ones in the low 8-bits: 0xyyyyyyff
+; 'and' of those is all known bits - it's just 'z'.
+
+define i32 @and_zext_demanded(i16 %x, i32 %y) {
+; CHECK-LABEL: @and_zext_demanded(
+; CHECK-NEXT:    [[S:%.*]] = lshr i16 %x, 8
+; CHECK-NEXT:    [[Z:%.*]] = zext i16 [[S]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %s = lshr i16 %x, 8
+  %z = zext i16 %s to i32
+  %o = or i32 %y, 255
+  %a = and i32 %o, %z
+  ret i32 %a
+}
+
+define i32 @test32(i32 %In) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    ret i32 0
+;
+  %Y = and i32 %In, 16
+  %Z = lshr i32 %Y, 2
+  %A = and i32 %Z, 1
+  ret i32 %A
+}
+
+;; Code corresponding to one-bit bitfield ^1.
+define i32 @test33(i32 %b) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor i32 %b, 1
+; CHECK-NEXT:    ret i32 [[TMP_13]]
+;
+  %tmp.4.mask = and i32 %b, 1
+  %tmp.10 = xor i32 %tmp.4.mask, 1
+  %tmp.12 = and i32 %b, -2
+  %tmp.13 = or i32 %tmp.12, %tmp.10
+  ret i32 %tmp.13
+}
+
+define i32 @test33b(i32 %b) {
+; CHECK-LABEL: @test33b(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor i32 [[B:%.*]], 1
+; CHECK-NEXT:    ret i32 [[TMP_13]]
+;
+  %tmp.4.mask = and i32 %b, 1
+  %tmp.10 = xor i32 %tmp.4.mask, 1
+  %tmp.12 = and i32 %b, -2
+  %tmp.13 = or i32 %tmp.10, %tmp.12
+  ret i32 %tmp.13
+}
+
+define <2 x i32> @test33vec(<2 x i32> %b) {
+; CHECK-LABEL: @test33vec(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_13]]
+;
+  %tmp.4.mask = and <2 x i32> %b, <i32 1, i32 1>
+  %tmp.10 = xor <2 x i32> %tmp.4.mask, <i32 1, i32 1>
+  %tmp.12 = and <2 x i32> %b, <i32 -2, i32 -2>
+  %tmp.13 = or <2 x i32> %tmp.12, %tmp.10
+  ret <2 x i32> %tmp.13
+}
+
+define <2 x i32> @test33vecb(<2 x i32> %b) {
+; CHECK-LABEL: @test33vecb(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_13]]
+;
+  %tmp.4.mask = and <2 x i32> %b, <i32 1, i32 1>
+  %tmp.10 = xor <2 x i32> %tmp.4.mask, <i32 1, i32 1>
+  %tmp.12 = and <2 x i32> %b, <i32 -2, i32 -2>
+  %tmp.13 = or <2 x i32> %tmp.10, %tmp.12
+  ret <2 x i32> %tmp.13
+}
+
+define i32 @test34(i32 %A, i32 %B) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    ret i32 %B
+;
+  %tmp.2 = or i32 %B, %A
+  %tmp.4 = and i32 %tmp.2, %B
+  ret i32 %tmp.4
+}
+
+; FIXME: This test should only need -instsimplify (ValueTracking / computeKnownBits), not -instcombine.
+
+define <2 x i32> @PR24942(<2 x i32> %x) {
+; CHECK-LABEL: @PR24942(
+; CHECK-NEXT:    ret <2 x i32> zeroinitializer
+;
+  %lshr = lshr <2 x i32> %x, <i32 31, i32 31>
+  %and = and <2 x i32> %lshr, <i32 2, i32 2>
+  ret <2 x i32> %and
+}
+
+define i64 @test35(i32 %X) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:  %[[sub:.*]] = sub i32 0, %X
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = sub i64 0, %zext
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test36(i32 %X) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:  %[[sub:.*]] = add i32 %X, 7
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = add i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test37(i32 %X) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:  %[[sub:.*]] = mul i32 %X, 7
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = mul i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test38(i32 %X) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:  %[[and:.*]] = and i32 %X, 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = xor i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test39(i32 %X) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:  %[[and:.*]] = and i32 %X, 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = or i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i32 @test40(i1 %C) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], i32 104, i32 10
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = and i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test40vec(i1 %C) {
+; CHECK-LABEL: @test40vec(
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 104, i32 104>, <2 x i32> <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = and <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test40vec2(i1 %C) {
+; CHECK-LABEL: @test40vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 104, i32 324>, <2 x i32> <i32 10, i32 12>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = and <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %V
+}
+
+define i32 @test41(i1 %which) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 104, [[ENTRY:%.*]] ], [ 10, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = and i32 %A, 123
+  ret i32 %value
+}
+
+define <2 x i32> @test41vec(i1 %which) {
+; CHECK-LABEL: @test41vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 104, i32 104>, [[ENTRY:%.*]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = and <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test41vec2(i1 %which) {
+; CHECK-LABEL: @test41vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 104, i32 324>, [[ENTRY:%.*]] ], [ <i32 10, i32 12>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = and <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %value
+}
+
+define i32 @test42(i32 %a, i32 %c, i32 %d) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[FORCE:%.*]] = mul i32 [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[FORCE]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %force = mul i32 %c, %d ; forces the complexity sorting
+  %or = or i32 %a, %force
+  %nota = xor i32 %a, -1
+  %xor = xor i32 %nota, %force
+  %and = and i32 %xor, %or
+  ret i32 %and
+}
+
+define i32 @test43(i32 %a, i32 %c, i32 %d) {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:    [[FORCE:%.*]] = mul i32 [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[FORCE]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %force = mul i32 %c, %d ; forces the complexity sorting
+  %or = or i32 %a, %force
+  %nota = xor i32 %a, -1
+  %xor = xor i32 %nota, %force
+  %and = and i32 %or, %xor
+  ret i32 %and
+}
+
+; (~y | x) & y -> x & y
+define i32 @test44(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %n = xor i32 %y, -1
+  %o = or i32 %n, %x
+  %a = and i32 %o, %y
+  ret i32 %a
+}
+
+; (x | ~y) & y -> x & y
+define i32 @test45(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %n = xor i32 %y, -1
+  %o = or i32 %x, %n
+  %a = and i32 %o, %y
+  ret i32 %a
+}
+
+; y & (~y | x) -> y | x
+define i32 @test46(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %n = xor i32 %y, -1
+  %o = or i32 %n, %x
+  %a = and i32 %y, %o
+  ret i32 %a
+}
+
+; y & (x | ~y) -> y | x
+define i32 @test47(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %n = xor i32 %y, -1
+  %o = or i32 %x, %n
+  %a = and i32 %y, %o
+  ret i32 %a
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (X & (Y | ~X)) -> (X & Y), where 'not' is an inverted cmp
+
+define i1 @and_orn_cmp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @and_orn_cmp_1(
+; CHECK-NEXT:    [[X:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[X]], [[Y]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp sgt i32 %a, %b
+  %x_inv = icmp sle i32 %a, %b
+  %y = icmp ugt i32 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %y, %x_inv
+  %and = and i1 %x, %or
+  ret i1 %and
+}
+
+; Commute the 'and':
+; ((Y | ~X) & X) -> (X & Y), where 'not' is an inverted cmp
+
+define <2 x i1> @and_orn_cmp_2(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @and_orn_cmp_2(
+; CHECK-NEXT:    [[X:%.*]] = icmp sge <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <2 x i32> [[C:%.*]], <i32 42, i32 47>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i1> [[Y]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[AND]]
+;
+  %x = icmp sge <2 x i32> %a, %b
+  %x_inv = icmp slt <2 x i32> %a, %b
+  %y = icmp ugt <2 x i32> %c, <i32 42, i32 47>      ; thwart complexity-based ordering
+  %or = or <2 x i1> %y, %x_inv
+  %and = and <2 x i1> %or, %x
+  ret <2 x i1> %and
+}
+
+; Commute the 'or':
+; (X & (~X | Y)) -> (X & Y), where 'not' is an inverted cmp
+
+define i1 @and_orn_cmp_3(i72 %a, i72 %b, i72 %c) {
+; CHECK-LABEL: @and_orn_cmp_3(
+; CHECK-NEXT:    [[X:%.*]] = icmp ugt i72 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i72 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[X]], [[Y]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp ugt i72 %a, %b
+  %x_inv = icmp ule i72 %a, %b
+  %y = icmp ugt i72 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %x_inv, %y
+  %and = and i1 %x, %or
+  ret i1 %and
+}
+
+; Commute the 'and':
+; ((~X | Y) & X) -> (X & Y), where 'not' is an inverted cmp
+
+define <3 x i1> @or_andn_cmp_4(<3 x i32> %a, <3 x i32> %b, <3 x i32> %c) {
+; CHECK-LABEL: @or_andn_cmp_4(
+; CHECK-NEXT:    [[X:%.*]] = icmp eq <3 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <3 x i32> [[C:%.*]], <i32 42, i32 43, i32 -1>
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i1> [[Y]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[AND]]
+;
+  %x = icmp eq <3 x i32> %a, %b
+  %x_inv = icmp ne <3 x i32> %a, %b
+  %y = icmp ugt <3 x i32> %c, <i32 42, i32 43, i32 -1>      ; thwart complexity-based ordering
+  %or = or <3 x i1> %x_inv, %y
+  %and = and <3 x i1> %or, %x
+  ret <3 x i1> %and
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (~X & (Y | X)) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_1(i37 %a, i37 %b, i37 %c) {
+; CHECK-LABEL: @andn_or_cmp_1(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i37 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[X_INV]], [[Y]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp sgt i37 %a, %b
+  %x_inv = icmp sle i37 %a, %b
+  %y = icmp ugt i37 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %y, %x
+  %and = and i1 %x_inv, %or
+  ret i1 %and
+}
+
+; Commute the 'and':
+; ((Y | X) & ~X) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_2(i16 %a, i16 %b, i16 %c) {
+; CHECK-LABEL: @andn_or_cmp_2(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i16 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[Y]], [[X_INV]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp sge i16 %a, %b
+  %x_inv = icmp slt i16 %a, %b
+  %y = icmp ugt i16 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %y, %x
+  %and = and i1 %or, %x_inv
+  ret i1 %and
+}
+
+; Commute the 'or':
+; (~X & (X | Y)) -> (~X & Y), where 'not' is an inverted cmp
+
+define <4 x i1> @andn_or_cmp_3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: @andn_or_cmp_3(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <4 x i32> [[C:%.*]], <i32 42, i32 0, i32 1, i32 -1>
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i1> [[X_INV]], [[Y]]
+; CHECK-NEXT:    ret <4 x i1> [[AND]]
+;
+  %x = icmp ugt <4 x i32> %a, %b
+  %x_inv = icmp ule <4 x i32> %a, %b
+  %y = icmp ugt <4 x i32> %c, <i32 42, i32 0, i32 1, i32 -1>      ; thwart complexity-based ordering
+  %or = or <4 x i1> %x, %y
+  %and = and <4 x i1> %x_inv, %or
+  ret <4 x i1> %and
+}
+
+; Commute the 'and':
+; ((X | Y) & ~X) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_4(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @andn_or_cmp_4(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[Y]], [[X_INV]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp eq i32 %a, %b
+  %x_inv = icmp ne i32 %a, %b
+  %y = icmp ugt i32 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %x, %y
+  %and = and i1 %or, %x_inv
+  ret i1 %and
+}
diff --git a/llvm/test/Transforms/InstCombine/and2.ll b/llvm/test/Transforms/InstCombine/and2.ll
new file mode 100644
index 00000000000..7d056266440
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/and2.ll
@@ -0,0 +1,238 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test2(i1 %X, i1 %Y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B:%.*]] = and i1 %X, %Y
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = and i1 %X, %Y
+  %b = and i1 %a, %X
+  ret i1 %b
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[B:%.*]] = and i32 %X, %Y
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %a = and i32 %X, %Y
+  %b = and i32 %Y, %a
+  ret i32 %b
+}
+
+define i1 @test7(i32 %i, i1 %b) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %i, 0
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], %b
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp slt i32 %i, 1
+  %cmp2 = icmp sgt i32 %i, -1
+  %and1 = and i1 %cmp1, %b
+  %and2 = and i1 %and1, %cmp2
+  ret i1 %and2
+}
+
+define i1 @test8(i32 %i) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[I_OFF:%.*]] = add i32 %i, -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[I_OFF]], 13
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp1 = icmp ne i32 %i, 0
+  %cmp2 = icmp ult i32 %i, 14
+  %cond = and i1 %cmp1, %cmp2
+  ret i1 %cond
+}
+
+; FIXME: Vectors should fold too.
+define <2 x i1> @test8vec(<2 x i32> %i) {
+; CHECK-LABEL: @test8vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne <2 x i32> %i, zeroinitializer
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult <2 x i32> %i, <i32 14, i32 14>
+; CHECK-NEXT:    [[COND:%.*]] = and <2 x i1> [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret <2 x i1> [[COND]]
+;
+  %cmp1 = icmp ne <2 x i32> %i, zeroinitializer
+  %cmp2 = icmp ult <2 x i32> %i, <i32 14, i32 14>
+  %cond = and <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %cond
+}
+
+; combine -x & 1 into x & 1
+define i64 @test9(i64 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[AND:%.*]] = and i64 %x, 1
+; CHECK-NEXT:    ret i64 [[AND]]
+;
+  %sub = sub nsw i64 0, %x
+  %and = and i64 %sub, 1
+  ret i64 %and
+}
+
+; combine -x & 1 into x & 1
+define <2 x i64> @test9vec(<2 x i64> %x) {
+; CHECK-LABEL: @test9vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> %x, <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i64> [[AND]]
+;
+  %sub = sub nsw <2 x i64> <i64 0, i64 0>, %x
+  %and = and <2 x i64> %sub, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+define i64 @test10(i64 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[AND:%.*]] = and i64 %x, 1
+; CHECK-NEXT:    [[ADD:%.*]] = sub i64 [[AND]], %x
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %sub = sub nsw i64 0, %x
+  %and = and i64 %sub, 1
+  %add = add i64 %sub, %and
+  ret i64 %add
+}
+
+; (1 << x) & 1 --> zext(x == 0)
+
+define i8 @and1_shl1_is_cmp_eq_0(i8 %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 %x, 0
+; CHECK-NEXT:    [[AND:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[AND]]
+;
+  %sh = shl i8 1, %x
+  %and = and i8 %sh, 1
+  ret i8 %and
+}
+
+; Don't do it if the shift has another use.
+
+define i8 @and1_shl1_is_cmp_eq_0_multiuse(i8 %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0_multiuse(
+; CHECK-NEXT:    [[SH:%.*]] = shl i8 1, %x
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[SH]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SH]], [[AND]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %sh = shl i8 1, %x
+  %and = and i8 %sh, 1
+  %add = add i8 %sh, %and
+  ret i8 %add
+}
+
+; (1 << x) & 1 --> zext(x == 0)
+
+define <2 x i8> @and1_shl1_is_cmp_eq_0_vec(<2 x i8> %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[AND:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[AND]]
+;
+  %sh = shl <2 x i8> <i8 1, i8 1>, %x
+  %and = and <2 x i8> %sh, <i8 1, i8 1>
+  ret <2 x i8> %and
+}
+
+; (1 >> x) & 1 --> zext(x == 0)
+
+define i8 @and1_lshr1_is_cmp_eq_0(i8 %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 %x, 0
+; CHECK-NEXT:    [[AND:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[AND]]
+;
+  %sh = lshr i8 1, %x
+  %and = and i8 %sh, 1
+  ret i8 %and
+}
+
+; Don't do it if the shift has another use.
+
+define i8 @and1_lshr1_is_cmp_eq_0_multiuse(i8 %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_multiuse(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i8 1, %x
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[SH]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i8 [[SH]], [[AND]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %sh = lshr i8 1, %x
+  %and = and i8 %sh, 1
+  %add = add i8 %sh, %and
+  ret i8 %add
+}
+
+; (1 >> x) & 1 --> zext(x == 0)
+
+define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec(<2 x i8> %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[AND:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[AND]]
+;
+  %sh = lshr <2 x i8> <i8 1, i8 1>, %x
+  %and = and <2 x i8> %sh, <i8 1, i8 1>
+  ret <2 x i8> %and
+}
+
+; The add in this test is unnecessary because the LSBs of the LHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test11(i32 %a, i32 %b) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = add i32 %x, %b
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The add in this test is unnecessary because the LSBs of the RHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = add i32 %b, %x
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The sub in this test is unnecessary because the LSBs of the RHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = sub i32 %b, %x
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The sub in this test cannot be removed because we need to keep the negation of %b. TODO: But we should be able to replace the LHS of it with a 0.
+define i32 @test14(i32 %a, i32 %b) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Y:%.*]] = sub i32 0, [[B:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[Y]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = sub i32 %x, %b
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-add.ll b/llvm/test/Transforms/InstCombine/apint-add.ll
new file mode 100644
index 00000000000..c55fd0419a6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-add.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Tests for Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i1 @test1(i1 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 %x
+;
+  %tmp.2 = xor i1 %x, 1
+  %tmp.4 = add i1 %tmp.2, 1
+  ret i1 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i47 @test2(i47 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i47 %x
+;
+  %tmp.2 = xor i47 %x, 70368744177664
+  %tmp.4 = add i47 %tmp.2, 70368744177664
+  ret i47 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i15 @test3(i15 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i15 %x
+;
+  %tmp.2 = xor i15 %x, 16384
+  %tmp.4 = add i15 %tmp.2, 16384
+  ret i15 %tmp.4
+}
+
+; X + signbit --> X ^ signbit
+define <2 x i5> @test3vec(<2 x i5> %x) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[Y:%.*]] = xor <2 x i5> %x, <i5 -16, i5 -16>
+; CHECK-NEXT:    ret <2 x i5> [[Y]]
+;
+  %y = add <2 x i5> %x, <i5 16, i5 16>
+  ret <2 x i5> %y
+}
+
+;; (x & 0b1111..0) + 1 -> x | 1
+define i49 @test4(i49 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP_4:%.*]] = or i49 %x, 1
+; CHECK-NEXT:    ret i49 [[TMP_4]]
+;
+  %tmp.2 = and i49 %x, 562949953421310
+  %tmp.4 = add i49 %tmp.2, 1
+  ret i49 %tmp.4
+}
+
+define i7 @sext(i4 %x) {
+; CHECK-LABEL: @sext(
+; CHECK-NEXT:    [[ADD:%.*]] = sext i4 %x to i7
+; CHECK-NEXT:    ret i7 [[ADD]]
+;
+  %xor = xor i4 %x, -8
+  %zext = zext i4 %xor to i7
+  %add = add nsw i7 %zext, -8
+  ret i7 %add
+}
+
+define <2 x i10> @sext_vec(<2 x i3> %x) {
+; CHECK-LABEL: @sext_vec(
+; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i3> %x to <2 x i10>
+; CHECK-NEXT:    ret <2 x i10> [[ADD]]
+;
+  %xor = xor <2 x i3> %x, <i3 -4, i3 -4>
+  %zext = zext <2 x i3> %xor to <2 x i10>
+  %add = add nsw <2 x i10> %zext, <i10 -4, i10 -4>
+  ret <2 x i10> %add
+}
+
+; Multiple uses of the operands don't prevent the fold.
+
+define i4 @sext_multiuse(i4 %x) {
+; CHECK-LABEL: @sext_multiuse(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i4 %x, -8
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i4 [[XOR]] to i7
+; CHECK-NEXT:    [[ADD:%.*]] = sext i4 %x to i7
+; CHECK-NEXT:    [[MUL:%.*]] = sdiv i7 [[ZEXT]], [[ADD]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i7 [[MUL]] to i4
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i4 [[TRUNC]], [[XOR]]
+; CHECK-NEXT:    ret i4 [[DIV]]
+;
+  %xor = xor i4 %x, -8
+  %zext = zext i4 %xor to i7
+  %add = add nsw i7 %zext, -8
+  %mul = sdiv i7 %zext, %add
+  %trunc = trunc i7 %mul to i4
+  %div = sdiv i4 %trunc, %xor
+  ret i4 %div
+}
+
+; Tests for Integer BitWidth > 64 && BitWidth <= 1024.
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i111 @test5(i111 %x) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i111 %x
+;
+  %tmp.2 = shl i111 1, 110
+  %tmp.4 = xor i111 %x, %tmp.2
+  %tmp.6 = add i111 %tmp.4, %tmp.2
+  ret i111 %tmp.6
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i65 @test6(i65 %x) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i65 %x
+;
+  %tmp.0 = shl i65 1, 64
+  %tmp.2 = xor i65 %x, %tmp.0
+  %tmp.4 = add i65 %tmp.2, %tmp.0
+  ret i65 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i1024 @test7(i1024 %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i1024 %x
+;
+  %tmp.0 = shl i1024 1, 1023
+  %tmp.2 = xor i1024 %x, %tmp.0
+  %tmp.4 = add i1024 %tmp.2, %tmp.0
+  ret i1024 %tmp.4
+}
+
+;; If we have add(xor(X, 0xF..F80..), 0x80..), it's an xor.
+define i128 @test8(i128 %x) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP_4:%.*]] = xor i128 %x, 170141183460469231731687303715884105600
+; CHECK-NEXT:    ret i128 [[TMP_4]]
+;
+  %tmp.5 = shl i128 1, 127
+  %tmp.1 = ashr i128 %tmp.5, 120
+  %tmp.2 = xor i128 %x, %tmp.1
+  %tmp.4 = add i128 %tmp.2, %tmp.5
+  ret i128 %tmp.4
+}
+
+;; (x & 254)+1 -> (x & 254)|1
+define i77 @test9(i77 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[TMP_2:%.*]] = and i77 %x, 562949953421310
+; CHECK-NEXT:    [[TMP_4:%.*]] = or i77 [[TMP_2]], 1
+; CHECK-NEXT:    ret i77 [[TMP_4]]
+;
+  %tmp.2 = and i77 %x, 562949953421310
+  %tmp.4 = add i77 %tmp.2, 1
+  ret i77 %tmp.4
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-and-compare.ll b/llvm/test/Transforms/InstCombine/apint-and-compare.ll
new file mode 100644
index 00000000000..53e591e69c9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-and-compare.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | grep and | count 2
+
+; Should be optimized to one and.
+define i1 @test1(i33 %a, i33 %b) {
+        %tmp1 = and i33 %a, 65280
+        %tmp3 = and i33 %b, 65280
+        %tmp = icmp ne i33 %tmp1, %tmp3
+        ret i1 %tmp
+}
+
+define i1 @test2(i999 %a, i999 %b) {
+        %tmp1 = and i999 %a, 65280
+        %tmp3 = and i999 %b, 65280
+        %tmp = icmp ne i999 %tmp1, %tmp3
+        ret i1 %tmp
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-and-or-and.ll b/llvm/test/Transforms/InstCombine/apint-and-or-and.ll
new file mode 100644
index 00000000000..43536d72e9b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-and-or-and.ll
@@ -0,0 +1,50 @@
+; If we have an 'and' of the result of an 'or', and one of the 'or' operands
+; cannot have contributed any of the resultant bits, delete the or.  This
+; occurs for very common C/C++ code like this:
+;
+; struct foo { int A : 16; int B : 16; };
+; void test(struct foo *F, int X, int Y) {
+;        F->A = X; F->B = Y;
+; }
+;
+; Which corresponds to test1.
+; 
+; This tests arbitrary precision integers.
+
+; RUN: opt < %s -instcombine -S | not grep "or "
+; END.
+
+define i17 @test1(i17 %X, i17 %Y) {
+	%A = and i17 %X, 7
+	%B = and i17 %Y, 8
+	%C = or i17 %A, %B
+	%D = and i17 %C, 7  ;; This cannot include any bits from %Y!
+	ret i17 %D
+}
+
+define i49 @test3(i49 %X, i49 %Y) {
+	%B = shl i49 %Y, 1
+	%C = or i49 %X, %B
+	%D = and i49 %C, 1  ;; This cannot include any bits from %Y!
+	ret i49 %D
+}
+
+define i67 @test4(i67 %X, i67 %Y) {
+	%B = lshr i67 %Y, 66
+	%C = or i67 %X, %B
+	%D = and i67 %C, 2  ;; This cannot include any bits from %Y!
+	ret i67 %D
+}
+
+define i231 @or_test1(i231 %X, i231 %Y) {
+	%A = and i231 %X, 1
+	%B = or i231 %A, 1     ;; This cannot include any bits from X!
+	ret i231 %B
+}
+
+define i7 @or_test2(i7 %X, i7 %Y) {
+	%A = shl i7 %X, 6
+	%B = or i7 %A, 64     ;; This cannot include any bits from X!
+	ret i7 %B
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-and-xor-merge.ll b/llvm/test/Transforms/InstCombine/apint-and-xor-merge.ll
new file mode 100644
index 00000000000..52633125048
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-and-xor-merge.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; This test case checks that the merge of and/xor can work on arbitrary
+; precision integers.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (x &z ) ^ (y & z) -> (x ^ y) & z
+define i57 @test1(i57 %x, i57 %y, i57 %z) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP61:%.*]] = xor i57 %x, %y
+; CHECK-NEXT:    [[TMP7:%.*]] = and i57 [[TMP61]], %z
+; CHECK-NEXT:    ret i57 [[TMP7]]
+;
+  %tmp3 = and i57 %z, %x
+  %tmp6 = and i57 %z, %y
+  %tmp7 = xor i57 %tmp3, %tmp6
+  ret i57 %tmp7
+}
+
+; (x & y) ^ (x | y) -> x ^ y
+define i23 @test2(i23 %x, i23 %y, i23 %z) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i23 %y, %x
+; CHECK-NEXT:    ret i23 [[TMP7]]
+;
+  %tmp3 = and i23 %y, %x
+  %tmp6 = or i23 %y, %x
+  %tmp7 = xor i23 %tmp3, %tmp6
+  ret i23 %tmp7
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-and.ll b/llvm/test/Transforms/InstCombine/apint-and.ll
new file mode 100644
index 00000000000..f0381dfc284
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-and.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; FIXME: Some of these tests belong in InstSimplify.
+
+; Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+define i39 @test0(i39 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:    ret i39 0
+;
+  %B = and i39 %A, 0 ; zero result
+  ret i39 %B
+}
+
+define i15 @test2(i15 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i15 %x
+;
+  %tmp.2 = and i15 %x, -1 ; noop
+  ret i15 %tmp.2
+}
+
+define i23 @test3(i23 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i23 0
+;
+  %tmp.0 = and i23 %x, 127
+  %tmp.2 = and i23 %tmp.0, 128
+  ret i23 %tmp.2
+}
+
+define i1 @test4(i37 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i37 %x, 2147483647
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = and i37 %x, -2147483648
+  %B = icmp ne i37 %A, 0
+  ret i1 %B
+}
+
+define i7 @test5(i7 %A, i7* %P) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[B:%.*]] = or i7 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = xor i7 [[B]], 12
+; CHECK-NEXT:    store i7 [[C]], i7* %P, align 1
+; CHECK-NEXT:    ret i7 3
+;
+  %B = or i7 %A, 3
+  %C = xor i7 %B, 12
+  store i7 %C, i7* %P
+  %r = and i7 %C, 3
+  ret i7 %r
+}
+
+define i47 @test7(i47 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i47 %A, 39
+; CHECK-NEXT:    ret i47 [[TMP1]]
+;
+  %X = ashr i47 %A, 39 ;; sign extend
+  %C1 = and i47 %X, 255
+  ret i47 %C1
+}
+
+; Integer BitWidth > 64 && BitWidth <= 1024.
+
+define i999 @test8(i999 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i999 0
+;
+  %B = and i999 %A, 0 ; zero result
+  ret i999 %B
+}
+
+define i1005 @test9(i1005 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i1005 %x
+;
+  %tmp.2 = and i1005 %x, -1 ; noop
+  ret i1005 %tmp.2
+}
+
+define i123 @test10(i123 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i123 0
+;
+  %tmp.0 = and i123 %x, 127
+  %tmp.2 = and i123 %tmp.0, 128
+  ret i123 %tmp.2
+}
+
+define i1 @test11(i737 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i737 %x, 2147483647
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = and i737 %x, -2147483648
+  %B = icmp ne i737 %A, 0
+  ret i1 %B
+}
+
+define i117 @test12(i117 %A, i117* %P) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[B:%.*]] = or i117 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = xor i117 [[B]], 12
+; CHECK-NEXT:    store i117 [[C]], i117* %P, align 4
+; CHECK-NEXT:    ret i117 3
+;
+  %B = or i117 %A, 3
+  %C = xor i117 %B, 12
+  store i117 %C, i117* %P
+  %r = and i117 %C, 3
+  ret i117 %r
+}
+
+define i1024 @test13(i1024 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i1024 %A, 1016
+; CHECK-NEXT:    ret i1024 [[TMP1]]
+;
+  %X = ashr i1024 %A, 1016 ;; sign extend
+  %C1 = and i1024 %X, 255
+  ret i1024 %C1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-call-cast-target.ll b/llvm/test/Transforms/InstCombine/apint-call-cast-target.ll
new file mode 100644
index 00000000000..f3a66c32414
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-call-cast-target.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK: %[[call:.*]] = call i7* @ctime(i999* null)
+; CHECK: %[[cast:.*]] = ptrtoint i7* %[[call]] to i32
+; CHECK: ret i32 %[[cast]]
+entry:
+	%tmp = call i32 bitcast (i7* (i999*)* @ctime to i32 (i99*)*)( i99* null )
+	ret i32 %tmp
+}
+
+define i7* @ctime(i999*) {
+; CHECK-LABEL: define i7* @ctime(
+; CHECK: %[[call:.*]] = call i32 @main()
+; CHECK: %[[cast:.*]] = inttoptr i32 %[[call]] to i7*
+entry:
+	%tmp = call i7* bitcast (i32 ()* @main to i7* ()*)( )
+	ret i7* %tmp
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-cast-and-cast.ll b/llvm/test/Transforms/InstCombine/apint-cast-and-cast.ll
new file mode 100644
index 00000000000..251d78f59be
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-cast-and-cast.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | not grep bitcast
+
+define i19 @test1(i43 %val) {
+  %t1 = bitcast i43 %val to i43 
+  %t2 = and i43 %t1, 1
+  %t3 = trunc i43 %t2 to i19
+  ret i19 %t3
+}
+
+define i73 @test2(i677 %val) {
+  %t1 = bitcast i677 %val to i677 
+  %t2 = and i677 %t1, 1
+  %t3 = trunc i677 %t2 to i73
+  ret i73 %t3
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-cast-cast-to-and.ll b/llvm/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
new file mode 100644
index 00000000000..b2069a93ac4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | not grep i41
+
+define i61 @test1(i61 %X) {
+        %Y = trunc i61 %X to i41 ;; Turn i61o an AND
+        %Z = zext i41 %Y to i61
+        ret i61 %Z
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-cast.ll b/llvm/test/Transforms/InstCombine/apint-cast.ll
new file mode 100644
index 00000000000..85e7a4fca93
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-cast.ll
@@ -0,0 +1,30 @@
+; Tests to make sure elimination of casts is working correctly
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i17 @test1(i17 %a) {
+        %tmp = zext i17 %a to i37               ; <i37> [#uses=2]
+        %tmp21 = lshr i37 %tmp, 8               ; <i37> [#uses=1]
+; CHECK: %tmp21 = lshr i17 %a, 8
+        %tmp5 = shl i37 %tmp, 8         ; <i37> [#uses=1]
+; CHECK: %tmp5 = shl i17 %a, 8
+        %tmp.upgrd.32 = or i37 %tmp21, %tmp5            ; <i37> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i17 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i37 %tmp.upgrd.32 to i17   ; <i17> [#uses=1]
+        ret i17 %tmp.upgrd.3
+; CHECK: ret i17 %tmp.upgrd.32
+}
+
+define i167 @test2(i167 %a) {
+        %tmp = zext i167 %a to i577               ; <i577> [#uses=2]
+        %tmp21 = lshr i577 %tmp, 9               ; <i577> [#uses=1]
+; CHECK: %tmp21 = lshr i167 %a, 9
+        %tmp5 = shl i577 %tmp, 8         ; <i577> [#uses=1]
+; CHECK: %tmp5 = shl i167 %a, 8
+        %tmp.upgrd.32 = or i577 %tmp21, %tmp5            ; <i577> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i167 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i577 %tmp.upgrd.32 to i167  ; <i167> [#uses=1]
+        ret i167 %tmp.upgrd.3
+; CHECK: ret i167 %tmp.upgrd.32
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-div1.ll b/llvm/test/Transforms/InstCombine/apint-div1.ll
new file mode 100644
index 00000000000..68aadac1de4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-div1.ll
@@ -0,0 +1,22 @@
+; This test makes sure that div instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+;
+; RUN: opt < %s -instcombine -S | not grep div
+
+
+define i33 @test1(i33 %X) {
+    %Y = udiv i33 %X, 4096
+    ret i33 %Y
+}
+
+define i49 @test2(i49 %X) {
+    %tmp.0 = shl i49 4096, 17
+    %Y = udiv i49 %X, %tmp.0
+    ret i49 %Y
+}
+
+define i59 @test3(i59 %X, i1 %C) {
+        %V = select i1 %C, i59 1024, i59 4096
+        %R = udiv i59 %X, %V
+        ret i59 %R
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-div2.ll b/llvm/test/Transforms/InstCombine/apint-div2.ll
new file mode 100644
index 00000000000..2d7ac78a210
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-div2.ll
@@ -0,0 +1,22 @@
+; This test makes sure that div instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth <= 1024.
+;
+; RUN: opt < %s -instcombine -S | not grep div
+
+
+define i333 @test1(i333 %X) {
+    %Y = udiv i333 %X, 70368744177664
+    ret i333 %Y
+}
+
+define i499 @test2(i499 %X) {
+    %tmp.0 = shl i499 4096, 197
+    %Y = udiv i499 %X, %tmp.0
+    ret i499 %Y
+}
+
+define i599 @test3(i599 %X, i1 %C) {
+        %V = select i1 %C, i599 70368744177664, i599 4096
+        %R = udiv i599 %X, %V
+        ret i599 %R
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-mul1.ll b/llvm/test/Transforms/InstCombine/apint-mul1.ll
new file mode 100644
index 00000000000..93fa5b0504c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-mul1.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This test makes sure that mul instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+
+define i17 @test1(i17 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[Y:%.*]] = shl i17 [[X:%.*]], 10
+; CHECK-NEXT:    ret i17 [[Y]]
+;
+  %Y = mul i17 %X, 1024
+  ret i17 %Y
+}
+
+define <2 x i17> @test2(<2 x i17> %X) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[Y:%.*]] = shl <2 x i17> [[X:%.*]], <i17 10, i17 10>
+; CHECK-NEXT:    ret <2 x i17> [[Y]]
+;
+  %Y = mul <2 x i17> %X, <i17 1024, i17 1024>
+  ret <2 x i17> %Y
+}
+
+define <2 x i17> @test3(<2 x i17> %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[Y:%.*]] = shl <2 x i17> [[X:%.*]], <i17 10, i17 8>
+; CHECK-NEXT:    ret <2 x i17> [[Y]]
+;
+  %Y = mul <2 x i17> %X, <i17 1024, i17 256>
+  ret <2 x i17> %Y
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-mul2.ll b/llvm/test/Transforms/InstCombine/apint-mul2.ll
new file mode 100644
index 00000000000..16239ec3fcd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-mul2.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This test makes sure that mul instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth % 2 >= 1024.
+
+define i177 @test1(i177 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[Y:%.*]] = shl i177 [[X:%.*]], 155
+; CHECK-NEXT:    ret i177 [[Y]]
+;
+  %C = shl i177 1, 155
+  %Y = mul i177 %X, %C
+  ret i177 %Y
+}
+
+define <2 x i177> @test2(<2 x i177> %X) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[Y:%.*]] = shl <2 x i177> [[X:%.*]], <i177 155, i177 155>
+; CHECK-NEXT:    ret <2 x i177> [[Y]]
+;
+  %C = shl <2 x i177> <i177 1, i177 1>, <i177 155, i177 155>
+  %Y = mul <2 x i177> %X, %C
+  ret <2 x i177> %Y
+}
+
+define <2 x i177> @test3(<2 x i177> %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[Y:%.*]] = shl <2 x i177> [[X:%.*]], <i177 150, i177 155>
+; CHECK-NEXT:    ret <2 x i177> [[Y]]
+;
+  %C = shl <2 x i177> <i177 1, i177 1>, <i177 150, i177 155>
+  %Y = mul <2 x i177> %X, %C
+  ret <2 x i177> %Y
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-not.ll b/llvm/test/Transforms/InstCombine/apint-not.ll
new file mode 100644
index 00000000000..c5b12fd5dee
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-not.ll
@@ -0,0 +1,17 @@
+; This test makes sure that the xor instructions are properly eliminated
+; when arbitrary precision integers are used.
+
+; RUN: opt < %s -instcombine -S | not grep xor
+
+define i33 @test1(i33 %A) {
+	%B = xor i33 %A, -1
+	%C = xor i33 %B, -1
+	ret i33 %C
+}
+
+define i1 @test2(i52 %A, i52 %B) {
+	%cond = icmp ule i52 %A, %B     ; Can change into uge
+	%Ret = xor i1 %cond, true
+	ret i1 %Ret
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-or.ll b/llvm/test/Transforms/InstCombine/apint-or.ll
new file mode 100644
index 00000000000..33304bf8ad6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-or.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; These tests are for Integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i23 @test1(i23 %A) {
+    ;; A | ~A == -1
+    %NotA = xor i23 -1, %A
+    %B = or i23 %A, %NotA
+    ret i23 %B
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret i23 -1
+}
+
+define i39 @test2(i39 %V, i39 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+    %N = and i39 %M, 274877906944
+    %A = add i39 %V, %N
+    %B = and i39 %A, %C1
+    %D = and i39 %V, 274877906943
+    %R = or i39 %B, %D
+    ret i39 %R
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %N = and i39 %M, -274877906944
+; CHECK-NEXT: %A = add i39 %N, %V
+; CHECK-NEXT: ret i39 %A
+}
+
+; These tests are for Integer BitWidth > 64 && BitWidth <= 1024.
+define i1023 @test4(i1023 %A) {
+    ;; A | ~A == -1
+    %NotA = xor i1023 -1, %A
+    %B = or i1023 %A, %NotA
+    ret i1023 %B
+; CHECK-LABEL: @test4
+; CHECK-NEXT: ret i1023 -1
+}
+
+define i399 @test5(i399 %V, i399 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+    %N = and i399 %M, 18446742974197923840
+    %A = add i399 %V, %N
+    %B = and i399 %A, %C1
+    %D = and i399 %V, 274877906943
+    %R = or i399 %B, %D
+    ret i399 %R
+; CHECK-LABEL: @test5
+; CHECK-NEXT: %N = and i399 %M, 18446742974197923840
+; CHECK-NEXT: %A = add i399 %N, %V
+; CHECK-NEXT: ret i399 %A
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-rem1.ll b/llvm/test/Transforms/InstCombine/apint-rem1.ll
new file mode 100644
index 00000000000..030faccee8b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-rem1.ll
@@ -0,0 +1,22 @@
+; This test makes sure that these instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+;
+; RUN: opt < %s -instcombine -S | not grep rem
+
+
+define i33 @test1(i33 %A) {
+    %B = urem i33 %A, 4096
+    ret i33 %B
+}
+
+define i49 @test2(i49 %A) {
+    %B = shl i49 4096, 11
+    %Y = urem i49 %A, %B
+    ret i49 %Y
+}
+
+define i59 @test3(i59 %X, i1 %C) {
+	%V = select i1 %C, i59 70368744177664, i59 4096
+	%R = urem i59 %X, %V
+	ret i59 %R
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-rem2.ll b/llvm/test/Transforms/InstCombine/apint-rem2.ll
new file mode 100644
index 00000000000..9bfc4cde952
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-rem2.ll
@@ -0,0 +1,22 @@
+; This test makes sure that these instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth <= 1024.
+;
+; RUN: opt < %s -instcombine -S | not grep rem
+
+
+define i333 @test1(i333 %A) {
+    %B = urem i333 %A, 70368744177664
+    ret i333 %B
+}
+
+define i499 @test2(i499 %A) {
+    %B = shl i499 4096, 111
+    %Y = urem i499 %A, %B
+    ret i499 %Y
+}
+
+define i599 @test3(i599 %X, i1 %C) {
+	%V = select i1 %C, i599 70368744177664, i599 4096
+	%R = urem i599 %X, %V
+	ret i599 %R
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-select.ll b/llvm/test/Transforms/InstCombine/apint-select.ll
new file mode 100644
index 00000000000..0613d437085
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-select.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; No selects should remain.
+
+define i41 @zext(i1 %C) {
+; CHECK-LABEL: @zext(
+; CHECK-NEXT:    [[V:%.*]] = zext i1 %C to i41
+; CHECK-NEXT:    ret i41 [[V]]
+;
+  %V = select i1 %C, i41 1, i41 0
+  ret i41 %V
+}
+
+define i41 @sext(i1 %C) {
+; CHECK-LABEL: @sext(
+; CHECK-NEXT:    [[V:%.*]] = sext i1 %C to i41
+; CHECK-NEXT:    ret i41 [[V]]
+;
+  %V = select i1 %C, i41 -1, i41 0
+  ret i41 %V
+}
+
+define i999 @not_zext(i1 %C) {
+; CHECK-LABEL: @not_zext(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 %C, true
+; CHECK-NEXT:    [[V:%.*]] = zext i1 [[NOT_C]] to i999
+; CHECK-NEXT:    ret i999 [[V]]
+;
+  %V = select i1 %C, i999 0, i999 1
+  ret i999 %V
+}
+
+define i999 @not_sext(i1 %C) {
+; CHECK-LABEL: @not_sext(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 %C, true
+; CHECK-NEXT:    [[V:%.*]] = sext i1 [[NOT_C]] to i999
+; CHECK-NEXT:    ret i999 [[V]]
+;
+  %V = select i1 %C, i999 0, i999 -1
+  ret i999 %V
+}
+
+; Vector selects of vector splat constants match APInt too.
+
+define <2 x i41> @zext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @zext_vec(
+; CHECK-NEXT:    [[V:%.*]] = zext <2 x i1> %C to <2 x i41>
+; CHECK-NEXT:    ret <2 x i41> [[V]]
+;
+  %V = select <2 x i1> %C, <2 x i41> <i41 1, i41 1>, <2 x i41> <i41 0, i41 0>
+  ret <2 x i41> %V
+}
+
+define <2 x i32> @sext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @sext_vec(
+; CHECK-NEXT:    [[V:%.*]] = sext <2 x i1> %C to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %V = select <2 x i1> %C, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 0, i32 0>
+  ret <2 x i32> %V
+}
+
+define <2 x i999> @not_zext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @not_zext_vec(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor <2 x i1> %C, <i1 true, i1 true>
+; CHECK-NEXT:    [[V:%.*]] = zext <2 x i1> [[NOT_C]] to <2 x i999>
+; CHECK-NEXT:    ret <2 x i999> [[V]]
+;
+  %V = select <2 x i1> %C, <2 x i999> <i999 0, i999 0>, <2 x i999> <i999 1, i999 1>
+  ret <2 x i999> %V
+}
+
+define <2 x i64> @not_sext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @not_sext_vec(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor <2 x i1> %C, <i1 true, i1 true>
+; CHECK-NEXT:    [[V:%.*]] = sext <2 x i1> [[NOT_C]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[V]]
+;
+  %V = select <2 x i1> %C, <2 x i64> <i64 0, i64 0>, <2 x i64> <i64 -1, i64 -1>
+  ret <2 x i64> %V
+}
+
+; But don't touch this - we would need 3 instructions to extend and splat the scalar select condition.
+
+define <2 x i32> @scalar_select_of_vectors(i1 %c) {
+; CHECK-LABEL: @scalar_select_of_vectors(
+; CHECK-NEXT:    [[V:%.*]] = select i1 %c, <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %V = select i1 %c, <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
+  ret <2 x i32> %V
+}
+
+;; (x <s 0) ? -1 : 0 -> ashr x, 31
+
+define i41 @test3(i41 %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i41 %X, 40
+; CHECK-NEXT:    ret i41 [[X_LOBIT]]
+;
+  %t = icmp slt i41 %X, 0
+  %V = select i1 %t, i41 -1, i41 0
+  ret i41 %V
+}
+
+;; (x <s 0) ? -1 : 0 -> ashr x, 31
+
+define i1023 @test4(i1023 %X) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i1023 %X, 1022
+; CHECK-NEXT:    ret i1023 [[X_LOBIT]]
+;
+  %t = icmp slt i1023 %X, 0
+  %V = select i1 %t, i1023 -1, i1023 0
+  ret i1023 %V
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-shift-simplify.ll b/llvm/test/Transforms/InstCombine/apint-shift-simplify.ll
new file mode 100644
index 00000000000..63703ba112a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-shift-simplify.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i41 @test0(i41 %A, i41 %B, i41 %C) {
+	%X = shl i41 %A, %C
+	%Y = shl i41 %B, %C
+	%Z = and i41 %X, %Y
+	ret i41 %Z
+; CHECK-LABEL: @test0(
+; CHECK-NEXT: and i41 %A, %B
+; CHECK-NEXT: shl i41
+; CHECK-NEXT: ret
+}
+
+define i57 @test1(i57 %A, i57 %B, i57 %C) {
+	%X = lshr i57 %A, %C
+	%Y = lshr i57 %B, %C
+	%Z = or i57 %X, %Y
+	ret i57 %Z
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: or i57 %A, %B
+; CHECK-NEXT: lshr i57
+; CHECK-NEXT: ret
+}
+
+define i49 @test2(i49 %A, i49 %B, i49 %C) {
+	%X = ashr i49 %A, %C
+	%Y = ashr i49 %B, %C
+	%Z = xor i49 %X, %Y
+	ret i49 %Z
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: xor i49 %A, %B
+; CHECK-NEXT: ashr i49
+; CHECK-NEXT: ret
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll
new file mode 100644
index 00000000000..495d9d6d8b2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-shift.ll
@@ -0,0 +1,551 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i55 @test6(i55 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = mul i55 [[A:%.*]], 6
+; CHECK-NEXT:    ret i55 [[C]]
+;
+  %B = shl i55 %A, 1
+  %C = mul i55 %B, 3
+  ret i55 %C
+}
+
+; (X * C2) << C1 --> X * (C2 << C1)
+
+define i55 @test6a(i55 %A) {
+; CHECK-LABEL: @test6a(
+; CHECK-NEXT:    [[C:%.*]] = mul i55 [[A:%.*]], 6
+; CHECK-NEXT:    ret i55 [[C]]
+;
+  %B = mul i55 %A, 3
+  %C = shl i55 %B, 1
+  ret i55 %C
+}
+
+; (X * C2) << C1 --> X * (C2 << C1)
+
+define <2 x i55> @test6a_vec(<2 x i55> %A) {
+; CHECK-LABEL: @test6a_vec(
+; CHECK-NEXT:    [[C:%.*]] = mul <2 x i55> [[A:%.*]], <i55 6, i55 48>
+; CHECK-NEXT:    ret <2 x i55> [[C]]
+;
+  %B = mul <2 x i55> %A, <i55 3, i55 12>
+  %C = shl <2 x i55> %B, <i55 1, i55 2>
+  ret <2 x i55> %C
+}
+
+define i29 @test7(i8 %X) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i29 -1
+;
+  %A = zext i8 %X to i29
+  %B = ashr i29 -1, %A
+  ret i29 %B
+}
+
+define i7 @test8(i7 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i7 0
+;
+  %B = shl i7 %A, 4
+  %C = shl i7 %B, 3
+  ret i7 %C
+}
+
+define i17 @test9(i17 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[B:%.*]] = and i17 [[A:%.*]], 1
+; CHECK-NEXT:    ret i17 [[B]]
+;
+  %B = shl i17 %A, 16
+  %C = lshr i17 %B, 16
+  ret i17 %C
+}
+
+; shl (lshr X, C), C --> and X, C'
+
+define i19 @test10(i19 %X) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[SH1:%.*]] = and i19 [[X:%.*]], -262144
+; CHECK-NEXT:    ret i19 [[SH1]]
+;
+  %sh1 = lshr i19 %X, 18
+  %sh2 = shl i19 %sh1, 18
+  ret i19 %sh2
+}
+
+; Two right shifts in the same direction:
+; lshr (lshr X, C1), C2 --> lshr X, C1 + C2
+
+define <2 x i19> @lshr_lshr_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @lshr_lshr_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i19> [[X:%.*]], <i19 5, i19 5>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = lshr <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = lshr <2 x i19> %sh1, <i19 2, i19 2>
+  ret <2 x i19> %sh2
+}
+
+define i9 @multiuse_lshr_lshr(i9 %x) {
+; CHECK-LABEL: @multiuse_lshr_lshr(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr i9 [[X:%.*]], 2
+; CHECK-NEXT:    [[SH2:%.*]] = lshr i9 [[X]], 5
+; CHECK-NEXT:    [[MUL:%.*]] = mul i9 [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret i9 [[MUL]]
+;
+  %sh1 = lshr i9 %x, 2
+  %sh2 = lshr i9 %sh1, 3
+  %mul = mul i9 %sh1, %sh2
+  ret i9 %mul
+}
+
+define <2 x i9> @multiuse_lshr_lshr_splat(<2 x i9> %x) {
+; CHECK-LABEL: @multiuse_lshr_lshr_splat(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i9> [[X:%.*]], <i9 2, i9 2>
+; CHECK-NEXT:    [[SH2:%.*]] = lshr <2 x i9> [[X]], <i9 5, i9 5>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i9> [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret <2 x i9> [[MUL]]
+;
+  %sh1 = lshr <2 x i9> %x, <i9 2, i9 2>
+  %sh2 = lshr <2 x i9> %sh1, <i9 3, i9 3>
+  %mul = mul <2 x i9> %sh1, %sh2
+  ret <2 x i9> %mul
+}
+
+; Two left shifts in the same direction:
+; shl (shl X, C1), C2 -->  shl X, C1 + C2
+
+define <2 x i19> @shl_shl_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @shl_shl_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i19> [[X:%.*]], <i19 5, i19 5>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = shl <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = shl <2 x i19> %sh1, <i19 2, i19 2>
+  ret <2 x i19> %sh2
+}
+
+define i42 @multiuse_shl_shl(i42 %x) {
+; CHECK-LABEL: @multiuse_shl_shl(
+; CHECK-NEXT:    [[SH1:%.*]] = shl i42 [[X:%.*]], 8
+; CHECK-NEXT:    [[SH2:%.*]] = shl i42 [[X]], 17
+; CHECK-NEXT:    [[MUL:%.*]] = mul i42 [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret i42 [[MUL]]
+;
+  %sh1 = shl i42 %x, 8
+  %sh2 = shl i42 %sh1, 9
+  %mul = mul i42 %sh1, %sh2
+  ret i42 %mul
+}
+
+define <2 x i42> @multiuse_shl_shl_splat(<2 x i42> %x) {
+; CHECK-LABEL: @multiuse_shl_shl_splat(
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i42> [[X:%.*]], <i42 8, i42 8>
+; CHECK-NEXT:    [[SH2:%.*]] = shl <2 x i42> [[X]], <i42 17, i42 17>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i42> [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret <2 x i42> [[MUL]]
+;
+  %sh1 = shl <2 x i42> %x, <i42 8, i42 8>
+  %sh2 = shl <2 x i42> %sh1, <i42 9, i42 9>
+  %mul = mul <2 x i42> %sh1, %sh2
+  ret <2 x i42> %mul
+}
+
+; Equal shift amounts in opposite directions become bitwise 'and':
+; lshr (shl X, C), C --> and X, C'
+
+define <2 x i19> @eq_shl_lshr_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @eq_shl_lshr_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = and <2 x i19> [[X:%.*]], <i19 65535, i19 65535>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = shl <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = lshr <2 x i19> %sh1, <i19 3, i19 3>
+  ret <2 x i19> %sh2
+}
+
+; Equal shift amounts in opposite directions become bitwise 'and':
+; shl (lshr X, C), C --> and X, C'
+
+define <2 x i19> @eq_lshr_shl_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @eq_lshr_shl_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = and <2 x i19> [[X:%.*]], <i19 -8, i19 -8>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = lshr <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = shl <2 x i19> %sh1, <i19 3, i19 3>
+  ret <2 x i19> %sh2
+}
+
+; In general, we would need an 'and' for this transform, but the masked-off bits are known zero.
+; shl (lshr X, C1), C2 --> lshr X, C1 - C2
+
+define <2 x i7> @lshr_shl_splat_vec(<2 x i7> %X) {
+; CHECK-LABEL: @lshr_shl_splat_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i7> [[X:%.*]], <i7 -8, i7 -8>
+; CHECK-NEXT:    [[SH1:%.*]] = lshr exact <2 x i7> [[MUL]], <i7 1, i7 1>
+; CHECK-NEXT:    ret <2 x i7> [[SH1]]
+;
+  %mul = mul <2 x i7> %X, <i7 -8, i7 -8>
+  %sh1 = lshr exact <2 x i7> %mul, <i7 3, i7 3>
+  %sh2 = shl nuw nsw <2 x i7> %sh1, <i7 2, i7 2>
+  ret <2 x i7> %sh2
+}
+
+; In general, we would need an 'and' for this transform, but the masked-off bits are known zero.
+; lshr (shl X, C1), C2 -->  shl X, C1 - C2
+
+define <2 x i7> @shl_lshr_splat_vec(<2 x i7> %X) {
+; CHECK-LABEL: @shl_lshr_splat_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <2 x i7> [[X:%.*]], <i7 9, i7 9>
+; CHECK-NEXT:    [[SH1:%.*]] = shl nuw nsw <2 x i7> [[DIV]], <i7 1, i7 1>
+; CHECK-NEXT:    ret <2 x i7> [[SH1]]
+;
+  %div = udiv <2 x i7> %X, <i7 9, i7 9>
+  %sh1 = shl nuw <2 x i7> %div, <i7 3, i7 3>
+  %sh2 = lshr exact <2 x i7> %sh1, <i7 2, i7 2>
+  ret <2 x i7> %sh2
+}
+
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+
+define i23 @test11(i23 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[A:%.*]] = mul i23 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = lshr i23 [[A]], 11
+; CHECK-NEXT:    [[C:%.*]] = shl i23 [[B]], 12
+; CHECK-NEXT:    ret i23 [[C]]
+;
+  %a = mul i23 %x, 3
+  %b = lshr i23 %a, 11
+  %c = shl i23 %b, 12
+  ret i23 %c
+}
+
+; shl (ashr X, C), C --> and X, C'
+
+define i47 @test12(i47 %X) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i47 [[X:%.*]], -256
+; CHECK-NEXT:    ret i47 [[TMP1]]
+;
+  %sh1 = ashr i47 %X, 8
+  %sh2 = shl i47 %sh1, 8
+  ret i47 %sh2
+}
+
+define <2 x i47> @test12_splat_vec(<2 x i47> %X) {
+; CHECK-LABEL: @test12_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i47> [[X:%.*]], <i47 -256, i47 -256>
+; CHECK-NEXT:    ret <2 x i47> [[TMP1]]
+;
+  %sh1 = ashr <2 x i47> %X, <i47 8, i47 8>
+  %sh2 = shl <2 x i47> %sh1, <i47 8, i47 8>
+  ret <2 x i47> %sh2
+}
+
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+
+define i18 @test13(i18 %x) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[A:%.*]] = mul i18 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i18 [[A]], 8
+; CHECK-NEXT:    [[C:%.*]] = shl i18 [[TMP1]], 9
+; CHECK-NEXT:    ret i18 [[C]]
+;
+  %a = mul i18 %x, 3
+  %b = ashr i18 %a, 8
+  %c = shl i18 %b, 9
+  ret i18 %c
+}
+
+define i35 @test14(i35 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[B:%.*]] = and i35 [[A:%.*]], -19760
+; CHECK-NEXT:    [[C:%.*]] = or i35 [[B]], 19744
+; CHECK-NEXT:    ret i35 [[C]]
+;
+  %B = lshr i35 %A, 4
+  %C = or i35 %B, 1234
+  %D = shl i35 %C, 4
+  ret i35 %D
+}
+
+define i79 @test14a(i79 %A) {
+; CHECK-LABEL: @test14a(
+; CHECK-NEXT:    [[C:%.*]] = and i79 [[A:%.*]], 77
+; CHECK-NEXT:    ret i79 [[C]]
+;
+  %B = shl i79 %A, 4
+  %C = and i79 %B, 1234
+  %D = lshr i79 %C, 4
+  ret i79 %D
+}
+
+define i45 @test15(i1 %C) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], i45 12, i45 4
+; CHECK-NEXT:    ret i45 [[A]]
+;
+  %A = select i1 %C, i45 3, i45 1
+  %V = shl i45 %A, 2
+  ret i45 %V
+}
+
+define i53 @test15a(i1 %X) {
+; CHECK-LABEL: @test15a(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[X:%.*]], i53 512, i53 128
+; CHECK-NEXT:    ret i53 [[V]]
+;
+  %A = select i1 %X, i8 3, i8 1
+  %B = zext i8 %A to i53
+  %V = shl i53 64, %B
+  ret i53 %V
+}
+
+define i1 @test16(i84 %X) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[AND:%.*]] = and i84 [[X:%.*]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i84 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i84 %X, 4
+  %and = and i84 %shr, 1
+  %cmp = icmp ne i84 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test16vec(<2 x i84> %X) {
+; CHECK-LABEL: @test16vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], <i84 16, i84 16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i84> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shr = ashr <2 x i84> %X, <i84 4, i84 4>
+  %and = and <2 x i84> %shr, <i84 1, i84 1>
+  %cmp = icmp ne <2 x i84> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test17(i106 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i106 [[A:%.*]], -8
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i106 [[B_MASK]], 9872
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = lshr i106 %A, 3
+  %C = icmp eq i106 %B, 1234
+  ret i1 %C
+}
+
+define <2 x i1> @test17vec(<2 x i106> %A) {
+; CHECK-LABEL: @test17vec(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i106> [[A:%.*]], <i106 -8, i106 -8>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i106> [[B_MASK]], <i106 9872, i106 9872>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = lshr <2 x i106> %A, <i106 3, i106 3>
+  %C = icmp eq <2 x i106> %B, <i106 1234, i106 1234>
+  ret <2 x i1> %C
+}
+
+define i1 @test18(i11 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = lshr i11 %A, 10
+  %C = icmp eq i11 %B, 123
+  ret i1 %C
+}
+
+define i1 @test19(i37 %A) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i37 [[A:%.*]], 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = ashr i37 %A, 2
+  %C = icmp eq i37 %B, 0
+  ret i1 %C
+}
+
+define <2 x i1> @test19vec(<2 x i37> %A) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i37> [[A:%.*]], <i37 4, i37 4>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = ashr <2 x i37> %A, <i37 2, i37 2>
+  %C = icmp eq <2 x i37> %B, zeroinitializer
+  ret <2 x i1> %C
+}
+
+define i1 @test19a(i39 %A) {
+; CHECK-LABEL: @test19a(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i39 [[A:%.*]], -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = ashr i39 %A, 2
+  %C = icmp eq i39 %B, -1
+  ret i1 %C
+}
+
+define <2 x i1> @test19a_vec(<2 x i39> %A) {
+; CHECK-LABEL: @test19a_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i39> [[A:%.*]], <i39 -5, i39 -5>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = ashr <2 x i39> %A, <i39 2, i39 2>
+  %C = icmp eq <2 x i39> %B, <i39 -1, i39 -1>
+  ret <2 x i1> %C
+}
+
+define i1 @test20(i13 %A) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = ashr i13 %A, 12
+  %C = icmp eq i13 %B, 123
+  ret i1 %C
+}
+
+define i1 @test21(i12 %A) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i12 [[A:%.*]], 63
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i12 [[B_MASK]], 62
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = shl i12 %A, 6
+  %C = icmp eq i12 %B, -128
+  ret i1 %C
+}
+
+define i1 @test22(i14 %A) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i14 [[A:%.*]], 127
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i14 [[B_MASK]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = shl i14 %A, 7
+  %C = icmp eq i14 %B, 0
+  ret i1 %C
+}
+
+define i11 @test23(i44 %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[D:%.*]] = trunc i44 [[A:%.*]] to i11
+; CHECK-NEXT:    ret i11 [[D]]
+;
+  %B = shl i44 %A, 33
+  %C = ashr i44 %B, 33
+  %D = trunc i44 %C to i11
+  ret i11 %D
+}
+
+; Fold lshr (shl X, C), C -> and X, C' regardless of the number of uses of the shl.
+
+define i44 @shl_lshr_eq_amt_multi_use(i44 %A) {
+; CHECK-LABEL: @shl_lshr_eq_amt_multi_use(
+; CHECK-NEXT:    [[B:%.*]] = shl i44 [[A:%.*]], 33
+; CHECK-NEXT:    [[C:%.*]] = and i44 [[A]], 2047
+; CHECK-NEXT:    [[D:%.*]] = or i44 [[B]], [[C]]
+; CHECK-NEXT:    ret i44 [[D]]
+;
+  %B = shl i44 %A, 33
+  %C = lshr i44 %B, 33
+  %D = add i44 %B, %C
+  ret i44 %D
+}
+
+; Fold vector lshr (shl X, C), C -> and X, C' regardless of the number of uses of the shl.
+
+define <2 x i44> @shl_lshr_eq_amt_multi_use_splat_vec(<2 x i44> %A) {
+; CHECK-LABEL: @shl_lshr_eq_amt_multi_use_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i44> [[A:%.*]], <i44 33, i44 33>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i44> [[A]], <i44 2047, i44 2047>
+; CHECK-NEXT:    [[D:%.*]] = or <2 x i44> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i44> [[D]]
+;
+  %B = shl <2 x i44> %A, <i44 33, i44 33>
+  %C = lshr <2 x i44> %B, <i44 33, i44 33>
+  %D = add <2 x i44> %B, %C
+  ret <2 x i44> %D
+}
+
+; Fold shl (lshr X, C), C -> and X, C' regardless of the number of uses of the lshr.
+
+define i43 @lshr_shl_eq_amt_multi_use(i43 %A) {
+; CHECK-LABEL: @lshr_shl_eq_amt_multi_use(
+; CHECK-NEXT:    [[B:%.*]] = lshr i43 [[A:%.*]], 23
+; CHECK-NEXT:    [[C:%.*]] = and i43 [[A]], -8388608
+; CHECK-NEXT:    [[D:%.*]] = mul i43 [[B]], [[C]]
+; CHECK-NEXT:    ret i43 [[D]]
+;
+  %B = lshr i43 %A, 23
+  %C = shl i43 %B, 23
+  %D = mul i43 %B, %C
+  ret i43 %D
+}
+
+; Fold vector shl (lshr X, C), C -> and X, C' regardless of the number of uses of the lshr.
+
+define <2 x i43> @lshr_shl_eq_amt_multi_use_splat_vec(<2 x i43> %A) {
+; CHECK-LABEL: @lshr_shl_eq_amt_multi_use_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i43> [[A:%.*]], <i43 23, i43 23>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i43> [[A]], <i43 -8388608, i43 -8388608>
+; CHECK-NEXT:    [[D:%.*]] = mul <2 x i43> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i43> [[D]]
+;
+  %B = lshr <2 x i43> %A, <i43 23, i43 23>
+  %C = shl <2 x i43> %B, <i43 23, i43 23>
+  %D = mul <2 x i43> %B, %C
+  ret <2 x i43> %D
+}
+
+define i37 @test25(i37 %tmp.2, i37 %AA) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[TMP_3:%.*]] = and i37 [[TMP_2:%.*]], -131072
+; CHECK-NEXT:    [[X2:%.*]] = add i37 [[TMP_3]], [[AA:%.*]]
+; CHECK-NEXT:    [[TMP_6:%.*]] = and i37 [[X2]], -131072
+; CHECK-NEXT:    ret i37 [[TMP_6]]
+;
+  %x = lshr i37 %AA, 17
+  %tmp.3 = lshr i37 %tmp.2, 17
+  %tmp.5 = add i37 %tmp.3, %x
+  %tmp.6 = shl i37 %tmp.5, 17
+  ret i37 %tmp.6
+}
+
+define i40 @test26(i40 %A) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[B:%.*]] = and i40 [[A:%.*]], -2
+; CHECK-NEXT:    ret i40 [[B]]
+;
+  %B = lshr i40 %A, 1
+  %C = bitcast i40 %B to i40
+  %D = shl i40 %C, 1
+  ret i40 %D
+}
+
+; OSS-Fuzz #9880
+; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9880
+define i177 @ossfuzz_9880(i177 %X) {
+; CHECK-LABEL: @ossfuzz_9880(
+; CHECK-NEXT:    [[A:%.*]] = alloca i177, align 8
+; CHECK-NEXT:    [[L1:%.*]] = load i177, i177* [[A]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i177 [[L1]], 0
+; CHECK-NEXT:    [[B1:%.*]] = zext i1 [[TMP1]] to i177
+; CHECK-NEXT:    ret i177 [[B1]]
+;
+  %A = alloca i177
+  %L1 = load i177, i177* %A
+  %B = or i177 0, -1
+  %B5 = udiv i177 %L1, %B
+  %B4 = add i177 %B5, %B
+  %B2 = add i177 %B, %B4
+  %B6 = mul i177 %B5, %B2
+  %B20 = shl i177 %L1, %B6
+  %B14 = sub i177 %B20, %B5
+  %B1 = udiv i177 %B14, %B6
+  ret i177 %B1
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-shl-trunc.ll b/llvm/test/Transforms/InstCombine/apint-shl-trunc.ll
new file mode 100644
index 00000000000..2241c88cb6b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-shl-trunc.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test0(i39 %X, i39 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i39 1, [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i39 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i39 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %B = lshr i39 %X, %A
+  %D = trunc i39 %B to i1
+  ret i1 %D
+}
+
+define i1 @test1(i799 %X, i799 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i799 1, [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i799 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i799 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %B = lshr i799 %X, %A
+  %D = trunc i799 %B to i1
+  ret i1 %D
+}
+
+define <2 x i1> @test0vec(<2 x i39> %X, <2 x i39> %A) {
+; CHECK-LABEL: @test0vec(
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i39> [[X:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = trunc <2 x i39> [[B]] to <2 x i1>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = lshr <2 x i39> %X, %A
+  %D = trunc <2 x i39> %B to <2 x i1>
+  ret <2 x i1> %D
+}
+
diff --git a/llvm/test/Transforms/InstCombine/apint-sub.ll b/llvm/test/Transforms/InstCombine/apint-sub.ll
new file mode 100644
index 00000000000..8d80f2c845b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-sub.ll
@@ -0,0 +1,191 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i23 @test1(i23 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i23 0
+;
+  %B = sub i23 %A, %A
+  ret i23 %B
+}
+
+define i47 @test2(i47 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i47 %A
+;
+  %B = sub i47 %A, 0
+  ret i47 %B
+}
+
+define i97 @test3(i97 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i97 %A
+;
+  %B = sub i97 0, %A
+  %C = sub i97 0, %B
+  ret i97 %C
+}
+
+define i108 @test4(i108 %A, i108 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[C:%.*]] = add i108 %x, %A
+; CHECK-NEXT:    ret i108 [[C]]
+;
+  %B = sub i108 0, %A
+  %C = sub i108 %x, %B
+  ret i108 %C
+}
+
+define i19 @test5(i19 %A, i19 %Bok, i19 %Cok) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[D1:%.*]] = sub i19 %Cok, %Bok
+; CHECK-NEXT:    [[E:%.*]] = add i19 [[D1]], %A
+; CHECK-NEXT:    ret i19 [[E]]
+;
+  %D = sub i19 %Bok, %Cok
+  %E = sub i19 %A, %D
+  ret i19 %E
+}
+
+define i57 @test6(i57 %A, i57 %B) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i57 %B, -1
+; CHECK-NEXT:    [[D:%.*]] = and i57 [[B_NOT]], %A
+; CHECK-NEXT:    ret i57 [[D]]
+;
+  %C = and i57 %A, %B
+  %D = sub i57 %A, %C
+  ret i57 %D
+}
+
+define i77 @test7(i77 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[B:%.*]] = xor i77 %A, -1
+; CHECK-NEXT:    ret i77 [[B]]
+;
+  %B = sub i77 -1, %A
+  ret i77 %B
+}
+
+define i27 @test8(i27 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[C:%.*]] = shl i27 %A, 3
+; CHECK-NEXT:    ret i27 [[C]]
+;
+  %B = mul i27 9, %A
+  %C = sub i27 %B, %A
+  ret i27 %C
+}
+
+define i42 @test9(i42 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = mul i42 %A, -2
+; CHECK-NEXT:    ret i42 [[C]]
+;
+  %B = mul i42 3, %A
+  %C = sub i42 %A, %B
+  ret i42 %C
+}
+
+define i1 @test11(i9 %A, i9 %B) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[CD:%.*]] = icmp ne i9 %A, %B
+; CHECK-NEXT:    ret i1 [[CD]]
+;
+  %C = sub i9 %A, %B
+  %cD = icmp ne i9 %C, 0
+  ret i1 %cD
+}
+
+define i43 @test12(i43 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[C:%.*]] = lshr i43 %A, 42
+; CHECK-NEXT:    ret i43 [[C]]
+;
+  %B = ashr i43 %A, 42
+  %C = sub i43 0, %B
+  ret i43 %C
+}
+
+define i79 @test13(i79 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[C:%.*]] = ashr i79 %A, 78
+; CHECK-NEXT:    ret i79 [[C]]
+;
+  %B = lshr i79 %A, 78
+  %C = sub i79 0, %B
+  ret i79 %C
+}
+
+define i1024 @test14(i1024 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[D:%.*]] = ashr i1024 %A, 1023
+; CHECK-NEXT:    ret i1024 [[D]]
+;
+  %B = lshr i1024 %A, 1023
+  %C = bitcast i1024 %B to i1024
+  %D = sub i1024 0, %C
+  ret i1024 %D
+}
+
+define i51 @test16(i51 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i51 %A, -1123
+; CHECK-NEXT:    ret i51 [[Y]]
+;
+  %X = sdiv i51 %A, 1123
+  %Y = sub i51 0, %X
+  ret i51 %Y
+}
+
+; Can't fold subtract here because negation might overflow.
+; PR3142
+define i25 @test17(i25 %Aok) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[B:%.*]] = sub i25 0, %Aok
+; CHECK-NEXT:    [[C:%.*]] = sdiv i25 [[B]], 1234
+; CHECK-NEXT:    ret i25 [[C]]
+;
+  %B = sub i25 0, %Aok
+  %C = sdiv i25 %B, 1234
+  ret i25 %C
+}
+
+define i128 @test18(i128 %Y) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    ret i128 0
+;
+  %t1 = shl i128 %Y, 2
+  %t2 = shl i128 %Y, 2
+  %t3 = sub i128 %t1, %t2
+  ret i128 %t3
+}
+
+define i39 @test19(i39 %X, i39 %Y) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    ret i39 %X
+;
+  %Z = sub i39 %X, %Y
+  %Q = add i39 %Z, %Y
+  ret i39 %Q
+}
+
+define i1 @test20(i33 %g, i33 %h) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[T4:%.*]] = icmp ne i33 %h, 0
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t2 = sub i33 %g, %h
+  %t4 = icmp ne i33 %t2, %g
+  ret i1 %t4
+}
+
+define i1 @test21(i256 %g, i256 %h) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[T4:%.*]] = icmp ne i256 %h, 0
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t2 = sub i256 %g, %h
+  %t4 = icmp ne i256 %t2, %g
+  ret i1 %t4
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-xor1.ll b/llvm/test/Transforms/InstCombine/apint-xor1.ll
new file mode 100644
index 00000000000..01cbcf158cd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-xor1.ll
@@ -0,0 +1,50 @@
+; This test makes sure that xor instructions are properly eliminated.
+; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+; RUN: opt < %s -instcombine -S | not grep "xor "
+
+
+define i47 @test1(i47 %A, i47 %B) {
+        ;; (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+        %A1 = and i47 %A, 70368744177664
+        %B1 = and i47 %B, 70368744177661
+        %C1 = xor i47 %A1, %B1
+        ret i47 %C1
+}
+
+define i15 @test2(i15 %x) {
+        %tmp.2 = xor i15 %x, 0
+        ret i15 %tmp.2
+}
+
+define i23 @test3(i23 %x) {
+        %tmp.2 = xor i23 %x, %x
+        ret i23 %tmp.2
+}
+
+define i37 @test4(i37 %x) {
+        ; x ^ ~x == -1
+        %NotX = xor i37 -1, %x
+        %B = xor i37 %x, %NotX
+        ret i37 %B
+}
+
+define i7 @test5(i7 %A) {
+        ;; (A|B)^B == A & (~B)
+        %t1 = or i7 %A, 23
+        %r = xor i7 %t1, 23
+        ret i7 %r
+}
+
+define i7 @test6(i7 %A) {
+        %t1 = xor i7 %A, 23
+        %r = xor i7 %t1, 23
+        ret i7 %r
+}
+
+define i47 @test7(i47 %A) {
+        ;; (A | C1) ^ C2 -> (A | C1) & ~C2 iff (C1&C2) == C2
+        %B1 = or i47 %A,   70368744177663
+        %C1 = xor i47 %B1, 703687463
+        ret i47 %C1
+}
diff --git a/llvm/test/Transforms/InstCombine/apint-xor2.ll b/llvm/test/Transforms/InstCombine/apint-xor2.ll
new file mode 100644
index 00000000000..ab93c923815
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/apint-xor2.ll
@@ -0,0 +1,51 @@
+; This test makes sure that xor instructions are properly eliminated.
+; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
+
+; RUN: opt < %s -instcombine -S | not grep "xor "
+; END.
+
+
+define i447 @test1(i447 %A, i447 %B) {
+        ;; (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+        %A1 = and i447 %A, 70368744177664
+        %B1 = and i447 %B, 70368744177663
+        %C1 = xor i447 %A1, %B1
+        ret i447 %C1
+}
+
+define i1005 @test2(i1005 %x) {
+        %tmp.2 = xor i1005 %x, 0
+        ret i1005 %tmp.2
+}
+
+define i123 @test3(i123 %x) {
+        %tmp.2 = xor i123 %x, %x
+        ret i123 %tmp.2
+}
+
+define i737 @test4(i737 %x) {
+        ; x ^ ~x == -1
+        %NotX = xor i737 -1, %x
+        %B = xor i737 %x, %NotX
+        ret i737 %B
+}
+
+define i700 @test5(i700 %A) {
+        ;; (A|B)^B == A & (~B)
+        %t1 = or i700 %A, 288230376151711743 
+        %r = xor i700 %t1, 288230376151711743 
+        ret i700 %r
+}
+
+define i77 @test6(i77 %A) {
+        %t1 = xor i77 %A, 23
+        %r = xor i77 %t1, 23
+        ret i77 %r
+}
+
+define i1023 @test7(i1023 %A) {
+        ;; (A | C1) ^ C2 -> (A | C1) & ~C2 iff (C1&C2) == C2
+        %B1 = or i1023 %A,   70368744177663
+        %C1 = xor i1023 %B1, 703687463
+        ret i1023 %C1
+}
diff --git a/llvm/test/Transforms/InstCombine/assoc-cast-assoc.ll b/llvm/test/Transforms/InstCombine/assoc-cast-assoc.ll
new file mode 100644
index 00000000000..c6bec13bec4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/assoc-cast-assoc.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i5 @XorZextXor(i3 %a) {
+; CHECK-LABEL: @XorZextXor(
+; CHECK-NEXT:    [[CAST:%.*]] = zext i3 %a to i5
+; CHECK-NEXT:    [[OP2:%.*]] = xor i5 [[CAST]], 15
+; CHECK-NEXT:    ret i5 [[OP2]]
+;
+  %op1 = xor i3 %a, 3
+  %cast = zext i3 %op1 to i5
+  %op2 = xor i5 %cast, 12
+  ret i5 %op2
+}
+
+define <2 x i32> @XorZextXorVec(<2 x i1> %a) {
+; CHECK-LABEL: @XorZextXorVec(
+; CHECK-NEXT:    [[CAST:%.*]] = zext <2 x i1> %a to <2 x i32>
+; CHECK-NEXT:    [[OP2:%.*]] = xor <2 x i32> [[CAST]], <i32 2, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[OP2]]
+;
+  %op1 = xor <2 x i1> %a, <i1 true, i1 false>
+  %cast = zext <2 x i1> %op1 to <2 x i32>
+  %op2 = xor <2 x i32> %cast, <i32 3, i32 1>
+  ret <2 x i32> %op2
+}
+
+define i5 @OrZextOr(i3 %a) {
+; CHECK-LABEL: @OrZextOr(
+; CHECK-NEXT:    [[CAST:%.*]] = zext i3 %a to i5
+; CHECK-NEXT:    [[OP2:%.*]] = or i5 [[CAST]], 11
+; CHECK-NEXT:    ret i5 [[OP2]]
+;
+  %op1 = or i3 %a, 3
+  %cast = zext i3 %op1 to i5
+  %op2 = or i5 %cast, 8
+  ret i5 %op2
+}
+
+define <2 x i32> @OrZextOrVec(<2 x i2> %a) {
+; CHECK-LABEL: @OrZextOrVec(
+; CHECK-NEXT:    [[CAST:%.*]] = zext <2 x i2> %a to <2 x i32>
+; CHECK-NEXT:    [[OP2:%.*]] = or <2 x i32> [[CAST]], <i32 3, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[OP2]]
+;
+  %op1 = or <2 x i2> %a, <i2 2, i2 0>
+  %cast = zext <2 x i2> %op1 to <2 x i32>
+  %op2 = or <2 x i32> %cast, <i32 1, i32 5>
+  ret <2 x i32> %op2
+}
+
+; Unlike the rest, this case is handled by SimplifyDemandedBits / ShrinkDemandedConstant.
+
+define i5 @AndZextAnd(i3 %a) {
+; CHECK-LABEL: @AndZextAnd(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i3 %a, 2
+; CHECK-NEXT:    [[OP2:%.*]] = zext i3 [[TMP1]] to i5
+; CHECK-NEXT:    ret i5 [[OP2]]
+;
+  %op1 = and i3 %a, 3
+  %cast = zext i3 %op1 to i5
+  %op2 = and i5 %cast, 14
+  ret i5 %op2
+}
+
+define <2 x i32> @AndZextAndVec(<2 x i8> %a) {
+; CHECK-LABEL: @AndZextAndVec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> %a, <i8 5, i8 0>
+; CHECK-NEXT:    [[OP2:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[OP2]]
+;
+  %op1 = and <2 x i8> %a, <i8 7, i8 0>
+  %cast = zext <2 x i8> %op1 to <2 x i32>
+  %op2 = and <2 x i32> %cast, <i32 261, i32 1>
+  ret <2 x i32> %op2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/assume-loop-align.ll b/llvm/test/Transforms/InstCombine/assume-loop-align.ll
new file mode 100644
index 00000000000..e803ba61774
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/assume-loop-align.ll
@@ -0,0 +1,47 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32* %a, i32* %b) #0 {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 63
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr2 = and i64 %ptrint1, 63
+  %maskcond3 = icmp eq i64 %maskedptr2, 0
+  tail call void @llvm.assume(i1 %maskcond3)
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: load i32, i32* {{.*}} align 64
+; CHECK: store i32 {{.*}}  align 64
+; CHECK: ret
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 1648
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/llvm/test/Transforms/InstCombine/assume-redundant.ll b/llvm/test/Transforms/InstCombine/assume-redundant.ll
new file mode 100644
index 00000000000..4bdbcc8d086
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/assume-redundant.ll
@@ -0,0 +1,81 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.s = type { double* }
+
+; Function Attrs: nounwind uwtable
+define void @_Z3fooR1s(%struct.s* nocapture readonly dereferenceable(8) %x) #0 {
+
+; CHECK-LABEL: @_Z3fooR1s
+; CHECK: call void @llvm.assume
+; CHECK-NOT: call void @llvm.assume
+
+entry:
+  %a = getelementptr inbounds %struct.s, %struct.s* %x, i64 0, i32 0
+  %0 = load double*, double** %a, align 8
+  %ptrint = ptrtoint double* %0 to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds double, double* %0, i64 %indvars.iv
+  %1 = load double, double* %arrayidx, align 16
+  %add = fadd double %1, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul = fmul double %add, 2.000000e+00
+  store double %mul, double* %arrayidx, align 16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx.1 = getelementptr inbounds double, double* %0, i64 %indvars.iv.next
+  %2 = load double, double* %arrayidx.1, align 8
+  %add.1 = fadd double %2, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul.1 = fmul double %add.1, 2.000000e+00
+  store double %mul.1, double* %arrayidx.1, align 8
+  %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
+  %exitcond.1 = icmp eq i64 %indvars.iv.next, 1599
+  br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare align 8 i8* @get()
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test1() {
+  %p = call align 8 i8* @get()
+  %ptrint = ptrtoint i8* %p to i64
+  %maskedptr = and i64 %ptrint, 7
+  %maskcond = icmp eq i64 %maskedptr, 0
+  call void @llvm.assume(i1 %maskcond)
+  ret void
+}
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test3() {
+  %p = alloca i8, align 8
+  %ptrint = ptrtoint i8* %p to i64
+  %maskedptr = and i64 %ptrint, 7
+  %maskcond = icmp eq i64 %maskedptr, 0
+  call void @llvm.assume(i1 %maskcond)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll
new file mode 100644
index 00000000000..bc11a1f17b2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/assume.ll
@@ -0,0 +1,358 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo1(i32* %a) #0 {
+entry:
+  %0 = load i32, i32* %a, align 4
+
+; Check that the alignment has been upgraded and that the assume has not
+; been removed:
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32, i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  ret i32 %0
+}
+
+define i32 @foo2(i32* %a) #0 {
+entry:
+; Same check as in @foo1, but make sure it works if the assume is first too.
+; CHECK-LABEL: @foo2
+; CHECK-DAG: load i32, i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  %0 = load i32, i32* %a, align 4
+  ret i32 %0
+}
+
+declare void @llvm.assume(i1) #1
+
+define i32 @simple(i32 %a) #1 {
+entry:
+
+; CHECK-LABEL: @simple
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 4
+
+  %cmp = icmp eq i32 %a, 4
+  tail call void @llvm.assume(i1 %cmp)
+  ret i32 %a
+}
+
+define i32 @can1(i1 %a, i1 %b, i1 %c) {
+entry:
+  %and1 = and i1 %a, %b
+  %and  = and i1 %and1, %c
+  tail call void @llvm.assume(i1 %and)
+
+; CHECK-LABEL: @can1
+; CHECK: call void @llvm.assume(i1 %a)
+; CHECK: call void @llvm.assume(i1 %b)
+; CHECK: call void @llvm.assume(i1 %c)
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+define i32 @can2(i1 %a, i1 %b, i1 %c) {
+entry:
+  %v = or i1 %a, %b
+  %w = xor i1 %v, 1
+  tail call void @llvm.assume(i1 %w)
+
+; CHECK-LABEL: @can2
+; CHECK: %[[V1:[^ ]+]] = xor i1 %a, true
+; CHECK: call void @llvm.assume(i1 %[[V1]])
+; CHECK: %[[V2:[^ ]+]] = xor i1 %b, true
+; CHECK: call void @llvm.assume(i1 %[[V2]])
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+define i32 @bar1(i32 %a) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; CHECK-LABEL: @bar1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  ret i32 %and1
+}
+
+define i32 @bar2(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @bar2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 3
+  ret i32 %and1
+}
+
+define i32 @bar3(i32 %a, i1 %x, i1 %y) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; Don't be fooled by other assumes around.
+; CHECK-LABEL: @bar3
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  tail call void @llvm.assume(i1 %x)
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  tail call void @llvm.assume(i1 %y)
+
+  ret i32 %and1
+}
+
+define i32 @bar4(i32 %a, i32 %b) {
+entry:
+  %and1 = and i32 %b, 3
+
+; CHECK-LABEL: @bar4
+; CHECK: call void @llvm.assume
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %cmp2 = icmp eq i32 %a, %b
+  tail call void @llvm.assume(i1 %cmp2)
+
+  ret i32 %and1
+}
+
+define i32 @icmp1(i32 %a) #0 {
+; CHECK-LABEL: @icmp1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 1
+;
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp2(i32 %a) #0 {
+; CHECK-LABEL: @icmp2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 0
+;
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %t0 = zext i1 %cmp to i32
+  %lnot.ext = xor i32 %t0, 1
+  ret i32 %lnot.ext
+}
+
+; If the 'not' of a condition is known true, then the condition must be false.
+
+define i1 @assume_not(i1 %cond) {
+; CHECK-LABEL: @assume_not(
+; CHECK-NEXT:    [[NOTCOND:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTCOND]])
+; CHECK-NEXT:    ret i1 false
+;
+  %notcond = xor i1 %cond, true
+  call void @llvm.assume(i1 %notcond)
+  ret i1 %cond
+}
+
+declare void @escape(i32* %a)
+
+; Canonicalize a nonnull assumption on a load into metadata form.
+
+define i1 @nonnull1(i32** %a) {
+; CHECK-LABEL: @nonnull1(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** %a, align 8, !nonnull !6
+; CHECK-NEXT:    tail call void @escape(i32* nonnull [[LOAD]])
+; CHECK-NEXT:    ret i1 false
+;
+  %load = load i32*, i32** %a
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  tail call void @escape(i32* %load)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+}
+
+; Make sure the above canonicalization applies only
+; to pointer types.  Doing otherwise would be illegal.
+
+define i1 @nonnull2(i32* %a) {
+; CHECK-LABEL: @nonnull2(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* %a, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[LOAD]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp eq i32 [[LOAD]], 0
+; CHECK-NEXT:    ret i1 [[RVAL]]
+;
+  %load = load i32, i32* %a
+  %cmp = icmp ne i32 %load, 0
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32 %load, 0
+  ret i1 %rval
+}
+
+; Make sure the above canonicalization does not trigger
+; if the assume is control dependent on something else
+
+define i1 @nonnull3(i32** %a, i1 %control) {
+; CHECK-LABEL: @nonnull3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** %a, align 8
+; CHECK-NEXT:    br i1 %control, label %taken, label %not_taken
+; CHECK:       taken:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp eq i32* [[LOAD]], null
+; CHECK-NEXT:    ret i1 [[RVAL]]
+; CHECK:       not_taken:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %load = load i32*, i32** %a
+  %cmp = icmp ne i32* %load, null
+  br i1 %control, label %taken, label %not_taken
+taken:
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+not_taken:
+  ret i1 true
+}
+
+; Make sure the above canonicalization does not trigger
+; if the path from the load to the assume is potentially
+; interrupted by an exception being thrown
+
+define i1 @nonnull4(i32** %a) {
+; CHECK-LABEL: @nonnull4(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** %a, align 8
+; CHECK-NEXT:    tail call void @escape(i32* [[LOAD]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp eq i32* [[LOAD]], null
+; CHECK-NEXT:    ret i1 [[RVAL]]
+;
+  %load = load i32*, i32** %a
+  ;; This call may throw!
+  tail call void @escape(i32* %load)
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+}
+
+; PR35846 - https://bugs.llvm.org/show_bug.cgi?id=35846
+
+define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) {
+; CHECK-LABEL: @assumption_conflicts_with_known_bits(
+; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
+; CHECK-NEXT:    ret i32 0
+;
+  %and1 = and i32 %b, 3
+  %B1 = lshr i32 %and1, %and1
+  %B3 = shl nuw nsw i32 %and1, %B1
+  %cmp = icmp eq i32 %B3, 1
+  tail call void @llvm.assume(i1 %cmp)
+  %cmp2 = icmp eq i32 %B1, %B3
+  tail call void @llvm.assume(i1 %cmp2)
+  ret i32 %and1
+}
+
+; PR37726 - https://bugs.llvm.org/show_bug.cgi?id=37726
+; There's a loophole in eliminating a redundant assumption when
+; we have conflicting assumptions. Verify that debuginfo doesn't
+; get in the way of the fold.
+
+define void @debug_interference(i8 %x) {
+; CHECK-LABEL: @debug_interference(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP1]])
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
+; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
+; CHECK-NEXT:    ret void
+;
+  %cmp1 = icmp eq i8 %x, 0
+  %cmp2 = icmp ne i8 %x, 0
+  tail call void @llvm.assume(i1 %cmp1)
+  tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9
+  tail call void @llvm.assume(i1 %cmp1)
+  tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9
+  tail call void @llvm.assume(i1 %cmp2)
+  tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9
+  tail call void @llvm.assume(i1 %cmp2)
+  ret void
+}
+
+; This would crash.
+; Does it ever make sense to peek through a bitcast of the icmp operand?
+
+define i32 @PR40940(<4 x i8> %x) {
+; CHECK-LABEL: @PR40940(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <4 x i8> [[SHUF]] to i32
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 65536
+; CHECK-NEXT:    call void @llvm.assume(i1 [[T3]])
+; CHECK-NEXT:    ret i32 [[T2]]
+;
+  %shuf = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+  %t2 = bitcast <4 x i8> %shuf to i32
+  %t3 = icmp ult i32 %t2, 65536
+  call void @llvm.assume(i1 %t3)
+  ret i32 %t2
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5, !6, !7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "Me", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: null, retainedTypes: null, imports: null)
+!1 = !DILocalVariable(name: "", arg: 1, scope: !2, file: null, line: 1, type: null)
+!2 = distinct !DISubprogram(name: "debug", linkageName: "debug", scope: null, file: null, line: 0, type: null, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!3 = !DIFile(filename: "consecutive-fences.ll", directory: "")
+!5 = !{i32 2, !"Dwarf Version", i32 4}
+!6 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = !{i32 1, !"wchar_size", i32 4}
+!8 = !{i32 7, !"PIC Level", i32 2}
+!9 = !DILocation(line: 0, column: 0, scope: !2)
+
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/llvm/test/Transforms/InstCombine/assume2.ll b/llvm/test/Transforms/InstCombine/assume2.ll
new file mode 100644
index 00000000000..8dc8831fffa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/assume2.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.assume(i1) #1
+
+define i32 @test1(i32 %a) #0 {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 5
+;
+  %and = and i32 %a, 15
+  %cmp = icmp eq i32 %and, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test2(i32 %a) #0 {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 10
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 2
+;
+  %and = and i32 %a, 15
+  %nand = xor i32 %and, -1
+  %cmp = icmp eq i32 %nand, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test3(i32 %a) #0 {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[V:%.*]] = or i32 [[A:%.*]], -16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], -11
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 5
+;
+  %v = or i32 %a, 4294967280
+  %cmp = icmp eq i32 %v, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test4(i32 %a) #0 {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[V:%.*]] = or i32 [[A:%.*]], -16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], -6
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 2
+;
+  %v = or i32 %a, 4294967280
+  %nv = xor i32 %v, -1
+  %cmp = icmp eq i32 %nv, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test5(i32 %a) #0 {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 4
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 4
+;
+  %v = xor i32 %a, 1
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test6(i32 %a) #0 {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[V_MASK:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 5
+;
+  %v = shl i32 %a, 2
+  %cmp = icmp eq i32 %v, 20
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 63
+  ret i32 %and1
+}
+
+define i32 @test7(i32 %a) #0 {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[V_MASK:%.*]] = and i32 [[A:%.*]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 20
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 20
+;
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+define i32 @test8(i32 %a) #0 {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[V_MASK:%.*]] = and i32 [[A:%.*]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 20
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 20
+;
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+define i32 @test9(i32 %a) #0 {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 0
+;
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+define i32 @test10(i32 %a) #0 {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], -1
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 -2147483648
+;
+  %cmp = icmp sle i32 %a, -2
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+define i32 @test11(i32 %a) #0 {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 257
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 0
+;
+  %cmp = icmp ule i32 %a, 256
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 3072
+  ret i32 %and1
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll
new file mode 100644
index 00000000000..0f9752657fa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/atomic.ll
@@ -0,0 +1,333 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; Check transforms involving atomic operations
+
+define i32 @test1(i32* %p) {
+; CHECK-LABEL: define i32 @test1(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: shl i32 %x, 1
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load i32, i32* %p, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+define i32 @test2(i32* %p) {
+; CHECK-LABEL: define i32 @test2(
+; CHECK: %x = load volatile i32, i32* %p, align 4
+; CHECK: %y = load volatile i32, i32* %p, align 4
+  %x = load volatile i32, i32* %p, align 4
+  %y = load volatile i32, i32* %p, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; The exact semantics of mixing volatile and non-volatile on the same
+; memory location are a bit unclear, but conservatively, we know we don't
+; want to remove the volatile.
+define i32 @test3(i32* %p) {
+; CHECK-LABEL: define i32 @test3(
+; CHECK: %x = load volatile i32, i32* %p, align 4
+  %x = load volatile i32, i32* %p, align 4
+  %y = load i32, i32* %p, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; Forwarding from a stronger ordered atomic is fine
+define i32 @test4(i32* %p) {
+; CHECK-LABEL: define i32 @test4(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: shl i32 %x, 1
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load atomic i32, i32* %p unordered, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; Forwarding from a non-atomic is not.  (The earlier load 
+; could in priciple be promoted to atomic and then forwarded, 
+; but we can't just  drop the atomic from the load.)
+define i32 @test5(i32* %p) {
+; CHECK-LABEL: define i32 @test5(
+; CHECK: %x = load atomic i32, i32* %p unordered, align 4
+  %x = load atomic i32, i32* %p unordered, align 4
+  %y = load i32, i32* %p, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; Forwarding atomic to atomic is fine
+define i32 @test6(i32* %p) {
+; CHECK-LABEL: define i32 @test6(
+; CHECK: %x = load atomic i32, i32* %p unordered, align 4
+; CHECK: shl i32 %x, 1
+  %x = load atomic i32, i32* %p unordered, align 4
+  %y = load atomic i32, i32* %p unordered, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; FIXME: we currently don't do anything for monotonic
+define i32 @test7(i32* %p) {
+; CHECK-LABEL: define i32 @test7(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: %y = load atomic i32, i32* %p monotonic, align 4
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load atomic i32, i32* %p monotonic, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; FIXME: We could forward in racy code
+define i32 @test8(i32* %p) {
+; CHECK-LABEL: define i32 @test8(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: %y = load atomic i32, i32* %p acquire, align 4
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load atomic i32, i32* %p acquire, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; An unordered access to null is still unreachable.  There's no
+; ordering imposed.
+define i32 @test9() {
+; CHECK-LABEL: define i32 @test9(
+; CHECK: store i32 undef, i32* null
+  %x = load atomic i32, i32* null unordered, align 4
+  ret i32 %x
+}
+
+define i32 @test9_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test9_no_null_opt(
+; CHECK: load atomic i32, i32* null unordered
+  %x = load atomic i32, i32* null unordered, align 4
+  ret i32 %x
+}
+
+; FIXME: Could also fold
+define i32 @test10() {
+; CHECK-LABEL: define i32 @test10(
+; CHECK: load atomic i32, i32* null monotonic
+  %x = load atomic i32, i32* null monotonic, align 4
+  ret i32 %x
+}
+
+define i32 @test10_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test10_no_null_opt(
+; CHECK: load atomic i32, i32* null monotonic
+  %x = load atomic i32, i32* null monotonic, align 4
+  ret i32 %x
+}
+
+; Would this be legal to fold?  Probably?
+define i32 @test11() {
+; CHECK-LABEL: define i32 @test11(
+; CHECK: load atomic i32, i32* null seq_cst
+  %x = load atomic i32, i32* null seq_cst, align 4
+  ret i32 %x
+}
+
+define i32 @test11_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test11_no_null_opt(
+; CHECK: load atomic i32, i32* null seq_cst
+  %x = load atomic i32, i32* null seq_cst, align 4
+  ret i32 %x
+}
+
+; An unordered access to null is still unreachable.  There's no
+; ordering imposed.
+define i32 @test12() {
+; CHECK-LABEL: define i32 @test12(
+; CHECK: store atomic i32 undef, i32* null
+  store atomic i32 0, i32* null unordered, align 4
+  ret i32 0
+}
+
+define i32 @test12_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test12_no_null_opt(
+; CHECK: store atomic i32 0, i32* null unordered
+  store atomic i32 0, i32* null unordered, align 4
+  ret i32 0
+}
+
+; FIXME: Could also fold
+define i32 @test13() {
+; CHECK-LABEL: define i32 @test13(
+; CHECK: store atomic i32 0, i32* null monotonic
+  store atomic i32 0, i32* null monotonic, align 4
+  ret i32 0
+}
+
+define i32 @test13_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test13_no_null_opt(
+; CHECK: store atomic i32 0, i32* null monotonic
+  store atomic i32 0, i32* null monotonic, align 4
+  ret i32 0
+}
+
+; Would this be legal to fold?  Probably?
+define i32 @test14() {
+; CHECK-LABEL: define i32 @test14(
+; CHECK: store atomic i32 0, i32* null seq_cst
+  store atomic i32 0, i32* null seq_cst, align 4
+  ret i32 0
+}
+
+define i32 @test14_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test14_no_null_opt(
+; CHECK: store atomic i32 0, i32* null seq_cst
+  store atomic i32 0, i32* null seq_cst, align 4
+  ret i32 0
+}
+
+@a = external global i32
+@b = external global i32
+
+define i32 @test15(i1 %cnd) {
+; CHECK-LABEL: define i32 @test15(
+; CHECK: load atomic i32, i32* @a unordered, align 4
+; CHECK: load atomic i32, i32* @b unordered, align 4
+  %addr = select i1 %cnd, i32* @a, i32* @b
+  %x = load atomic i32, i32* %addr unordered, align 4
+  ret i32 %x
+}
+
+; FIXME: This would be legal to transform
+define i32 @test16(i1 %cnd) {
+; CHECK-LABEL: define i32 @test16(
+; CHECK: load atomic i32, i32* %addr monotonic, align 4
+  %addr = select i1 %cnd, i32* @a, i32* @b
+  %x = load atomic i32, i32* %addr monotonic, align 4
+  ret i32 %x
+}
+
+; FIXME: This would be legal to transform
+define i32 @test17(i1 %cnd) {
+; CHECK-LABEL: define i32 @test17(
+; CHECK: load atomic i32, i32* %addr seq_cst, align 4
+  %addr = select i1 %cnd, i32* @a, i32* @b
+  %x = load atomic i32, i32* %addr seq_cst, align 4
+  ret i32 %x
+}
+
+define i32 @test22(i1 %cnd) {
+; CHECK-LABEL: define i32 @test22(
+; CHECK: [[PHI:%.*]] = phi i32
+; CHECK: store atomic i32 [[PHI]], i32* @a unordered, align 4
+  br i1 %cnd, label %block1, label %block2
+
+block1:
+  store atomic i32 1, i32* @a unordered, align 4
+  br label %merge
+block2:
+  store atomic i32 2, i32* @a unordered, align 4
+  br label %merge
+
+merge:
+  ret i32 0
+}
+
+; TODO: probably also legal here
+define i32 @test23(i1 %cnd) {
+; CHECK-LABEL: define i32 @test23(
+; CHECK: br i1 %cnd, label %block1, label %block2
+  br i1 %cnd, label %block1, label %block2
+
+block1:
+  store atomic i32 1, i32* @a monotonic, align 4
+  br label %merge
+block2:
+  store atomic i32 2, i32* @a monotonic, align 4
+  br label %merge
+
+merge:
+  ret i32 0
+}
+
+declare void @clobber()
+
+define i32 @test18(float* %p) {
+; CHECK-LABEL: define i32 @test18(
+; CHECK: load atomic i32, i32* [[A:%.*]] unordered, align 4
+; CHECK: store atomic i32 [[B:%.*]], i32* [[C:%.*]] unordered, align 4
+  %x = load atomic float, float* %p unordered, align 4
+  call void @clobber() ;; keep the load around
+  store atomic float %x, float* %p unordered, align 4
+  ret i32 0
+}
+
+; TODO: probably also legal in this case
+define i32 @test19(float* %p) {
+; CHECK-LABEL: define i32 @test19(
+; CHECK: load atomic float, float* %p seq_cst, align 4
+; CHECK: store atomic float %x, float* %p seq_cst, align 4
+  %x = load atomic float, float* %p seq_cst, align 4
+  call void @clobber() ;; keep the load around
+  store atomic float %x, float* %p seq_cst, align 4
+  ret i32 0
+}
+
+define i32 @test20(i32** %p, i8* %v) {
+; CHECK-LABEL: define i32 @test20(
+; CHECK: store atomic i8* %v, i8** [[D:%.*]] unordered, align 4
+  %cast = bitcast i8* %v to i32*
+  store atomic i32* %cast, i32** %p unordered, align 4
+  ret i32 0
+}
+
+define i32 @test21(i32** %p, i8* %v) {
+; CHECK-LABEL: define i32 @test21(
+; CHECK: store atomic i32* %cast, i32** %p monotonic, align 4
+  %cast = bitcast i8* %v to i32*
+  store atomic i32* %cast, i32** %p monotonic, align 4
+  ret i32 0
+}
+
+define void @pr27490a(i8** %p1, i8** %p2) {
+; CHECK-LABEL: define void @pr27490
+; CHECK: %1 = bitcast i8** %p1 to i64*
+; CHECK: %l1 = load i64, i64* %1, align 8
+; CHECK: %2 = bitcast i8** %p2 to i64*
+; CHECK: store volatile i64 %l1, i64* %2, align 8
+  %l = load i8*, i8** %p1
+  store volatile i8* %l, i8** %p2
+  ret void
+}
+
+define void @pr27490b(i8** %p1, i8** %p2) {
+; CHECK-LABEL: define void @pr27490
+; CHECK: %1 = bitcast i8** %p1 to i64*
+; CHECK: %l1 = load i64, i64* %1, align 8
+; CHECK: %2 = bitcast i8** %p2 to i64*
+; CHECK: store atomic i64 %l1, i64* %2 seq_cst, align 8
+  %l = load i8*, i8** %p1
+  store atomic i8* %l, i8** %p2 seq_cst, align 8
+  ret void
+}
+
+;; At the moment, we can't form atomic vectors by folding since these are 
+;; not representable in the IR.  This was pr29121.  The right long term
+;; solution is to extend the IR to handle this case.
+define <2 x float> @no_atomic_vector_load(i64* %p) {
+; CHECK-LABEL @no_atomic_vector_load
+; CHECK: load atomic i64, i64* %p unordered, align 8
+  %load = load atomic i64, i64* %p unordered, align 8
+  %.cast = bitcast i64 %load to <2 x float>
+  ret <2 x float> %.cast
+}
+
+define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) {
+; CHECK-LABEL: @no_atomic_vector_store
+; CHECK: store atomic i64 %1, i64* %2 unordered, align 8
+  %1 = bitcast <2 x float> %p to i64
+  %2 = bitcast i8* %p2 to i64*
+  store atomic i64 %1, i64* %2 unordered, align 8
+  ret void
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/InstCombine/atomicrmw.ll b/llvm/test/Transforms/InstCombine/atomicrmw.ll
new file mode 100644
index 00000000000..6b594bed33c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/atomicrmw.ll
@@ -0,0 +1,298 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+; Check that we can replace `atomicrmw <op> LHS, 0` with `load atomic LHS`.
+; This is possible when:
+; - <op> LHS, 0 == LHS
+; - the ordering of atomicrmw is compatible with a load (i.e., no release semantic)
+
+; CHECK-LABEL: atomic_add_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_add_zero(i32* %addr) {
+  %res = atomicrmw add i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_or_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_or_zero(i32* %addr) {
+  %res = atomicrmw add i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_sub_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_sub_zero(i32* %addr) {
+  %res = atomicrmw sub i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_and_allones
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_and_allones(i32* %addr) {
+  %res = atomicrmw and i32* %addr, i32 -1 monotonic
+  ret i32 %res
+}
+; CHECK-LABEL: atomic_umin_uint_max
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_umin_uint_max(i32* %addr) {
+  %res = atomicrmw umin i32* %addr, i32 -1 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_umax_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_umax_zero(i32* %addr) {
+  %res = atomicrmw umax i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_min_smax_char
+; CHECK-NEXT: %res = load atomic i8, i8* %addr monotonic, align 1
+; CHECK-NEXT: ret i8 %res
+define i8 @atomic_min_smax_char(i8* %addr) {
+  %res = atomicrmw min i8* %addr, i8 127 monotonic
+  ret i8 %res
+}
+
+; CHECK-LABEL: atomic_max_smin_char
+; CHECK-NEXT: %res = load atomic i8, i8* %addr monotonic, align 1
+; CHECK-NEXT: ret i8 %res
+define i8 @atomic_max_smin_char(i8* %addr) {
+  %res = atomicrmw max i8* %addr, i8 -128 monotonic
+  ret i8 %res
+}
+
+; CHECK-LABEL: atomic_fsub
+; CHECK-NEXT: %res = load atomic float, float* %addr monotonic, align 4
+; CHECK-NEXT: ret float %res
+define float @atomic_fsub_zero(float* %addr) {
+  %res = atomicrmw fsub float* %addr, float 0.0 monotonic
+  ret float %res
+}
+
+; CHECK-LABEL: atomic_fadd
+; CHECK-NEXT: %res = load atomic float, float* %addr monotonic, align 4
+; CHECK-NEXT: ret float %res
+define float @atomic_fadd_zero(float* %addr) {
+  %res = atomicrmw fadd float* %addr, float -0.0 monotonic
+  ret float %res
+}
+
+; CHECK-LABEL: atomic_fsub_canon
+; CHECK-NEXT: %res = atomicrmw fadd float* %addr, float -0.000000e+00 release
+; CHECK-NEXT: ret float %res
+define float @atomic_fsub_canon(float* %addr) {
+  %res = atomicrmw fsub float* %addr, float 0.0 release
+  ret float %res
+}
+; CHECK-LABEL: atomic_fadd_canon
+; CHECK-NEXT: %res = atomicrmw fadd float* %addr, float -0.000000e+00 release
+; CHECK-NEXT: ret float %res
+define float @atomic_fadd_canon(float* %addr) {
+  %res = atomicrmw fadd float* %addr, float -0.0 release
+  ret float %res
+}
+
+; Can't replace a volatile w/a load; this would eliminate a volatile store.
+; CHECK-LABEL: atomic_sub_zero_volatile
+; CHECK-NEXT: %res = atomicrmw volatile sub i64* %addr, i64 0 acquire
+; CHECK-NEXT: ret i64 %res
+define i64 @atomic_sub_zero_volatile(i64* %addr) {
+  %res = atomicrmw volatile sub i64* %addr, i64 0 acquire
+  ret i64 %res
+}
+
+
+; Check that the transformation properly preserve the syncscope.
+; CHECK-LABEL: atomic_syncscope
+; CHECK-NEXT: %res = load atomic i16, i16* %addr syncscope("some_syncscope") acquire, align 2
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_syncscope(i16* %addr) {
+  %res = atomicrmw or i16* %addr, i16 0 syncscope("some_syncscope") acquire
+  ret i16 %res
+}
+
+; By eliminating the store part of the atomicrmw, we would get rid of the
+; release semantic, which is incorrect.  We can canonicalize the operation.
+; CHECK-LABEL: atomic_seq_cst
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 seq_cst
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_seq_cst(i16* %addr) {
+  %res = atomicrmw add i16* %addr, i16 0 seq_cst
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the value is changed by
+; the atomic operation (non zero constant).
+; CHECK-LABEL: atomic_add_non_zero
+; CHECK-NEXT: %res = atomicrmw add i16* %addr, i16 2 monotonic
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_add_non_zero(i16* %addr) {
+  %res = atomicrmw add i16* %addr, i16 2 monotonic
+  ret i16 %res
+}
+
+; CHECK-LABEL: atomic_xor_zero
+; CHECK-NEXT: %res = load atomic i16, i16* %addr monotonic, align 2
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_xor_zero(i16* %addr) {
+  %res = atomicrmw xor i16* %addr, i16 0 monotonic
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the ordering is
+; incompatible with a load (release).  Do canonicalize.
+; CHECK-LABEL: atomic_release
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 release
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_release(i16* %addr) {
+  %res = atomicrmw sub i16* %addr, i16 0 release
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the ordering is
+; incompatible with a load (acquire, release).  Do canonicalize.
+; CHECK-LABEL: atomic_acq_rel
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 acq_rel
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_acq_rel(i16* %addr) {
+  %res = atomicrmw xor i16* %addr, i16 0 acq_rel
+  ret i16 %res
+}
+
+
+; CHECK-LABEL: sat_or_allones
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 -1 monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @sat_or_allones(i32* %addr) {
+  %res = atomicrmw or i32* %addr, i32 -1 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: sat_and_zero
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 0 monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @sat_and_zero(i32* %addr) {
+  %res = atomicrmw and i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+; CHECK-LABEL: sat_umin_uint_min
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 0 monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @sat_umin_uint_min(i32* %addr) {
+  %res = atomicrmw umin i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: sat_umax_uint_max
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 -1 monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @sat_umax_uint_max(i32* %addr) {
+  %res = atomicrmw umax i32* %addr, i32 -1 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: sat_min_smin_char
+; CHECK-NEXT: %res = atomicrmw xchg i8* %addr, i8 -128 monotonic
+; CHECK-NEXT: ret i8 %res
+define i8 @sat_min_smin_char(i8* %addr) {
+  %res = atomicrmw min i8* %addr, i8 -128 monotonic
+  ret i8 %res
+}
+
+; CHECK-LABEL: sat_max_smax_char
+; CHECK-NEXT: %res = atomicrmw xchg i8* %addr, i8 127 monotonic
+; CHECK-NEXT: ret i8 %res
+define i8 @sat_max_smax_char(i8* %addr) {
+  %res = atomicrmw max i8* %addr, i8 127 monotonic
+  ret i8 %res
+}
+
+; CHECK-LABEL: sat_fadd_nan
+; CHECK-NEXT: %res = atomicrmw xchg double* %addr, double 0x7FF00000FFFFFFFF release
+; CHECK-NEXT: ret double %res
+define double @sat_fadd_nan(double* %addr) {
+  %res = atomicrmw fadd double* %addr, double 0x7FF00000FFFFFFFF release
+  ret double %res
+}
+
+; CHECK-LABEL: sat_fsub_nan
+; CHECK-NEXT: %res = atomicrmw xchg double* %addr, double 0x7FF00000FFFFFFFF release
+; CHECK-NEXT: ret double %res
+define double @sat_fsub_nan(double* %addr) {
+  %res = atomicrmw fsub double* %addr, double 0x7FF00000FFFFFFFF release
+  ret double %res
+}
+
+; CHECK-LABEL: sat_fsub_nan_unused
+; CHECK-NEXT: store atomic double 0x7FF00000FFFFFFFF, double* %addr monotonic, align 8
+; CHECK-NEXT: ret void
+define void @sat_fsub_nan_unused(double* %addr) {
+  atomicrmw fsub double* %addr, double 0x7FF00000FFFFFFFF monotonic
+  ret void
+}
+
+; CHECK-LABEL: xchg_unused_monotonic
+; CHECK-NEXT: store atomic i32 0, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret void
+define void @xchg_unused_monotonic(i32* %addr) {
+  atomicrmw xchg i32* %addr, i32 0 monotonic
+  ret void
+}
+
+; CHECK-LABEL: xchg_unused_release
+; CHECK-NEXT: store atomic i32 -1, i32* %addr release, align 4
+; CHECK-NEXT: ret void
+define void @xchg_unused_release(i32* %addr) {
+  atomicrmw xchg i32* %addr, i32 -1 release
+  ret void
+}
+
+; CHECK-LABEL: xchg_unused_seq_cst
+; CHECK-NEXT: atomicrmw xchg i32* %addr, i32 0 seq_cst
+; CHECK-NEXT: ret void
+define void @xchg_unused_seq_cst(i32* %addr) {
+  atomicrmw xchg i32* %addr, i32 0 seq_cst
+  ret void
+}
+
+; CHECK-LABEL: xchg_unused_volatile
+; CHECK-NEXT: atomicrmw volatile xchg i32* %addr, i32 0 monotonic
+; CHECK-NEXT: ret void
+define void @xchg_unused_volatile(i32* %addr) {
+  atomicrmw volatile xchg i32* %addr, i32 0 monotonic
+  ret void
+}
+
+; CHECK-LABEL: sat_or_allones_unused
+; CHECK-NEXT: store atomic i32 -1, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret void
+define void @sat_or_allones_unused(i32* %addr) {
+  atomicrmw or i32* %addr, i32 -1 monotonic
+  ret void
+}
+
+
+; CHECK-LABEL: undef_operand_unused
+; CHECK-NEXT: atomicrmw or i32* %addr, i32 undef monotonic
+; CHECK-NEXT: ret void
+define void @undef_operand_unused(i32* %addr) {
+  atomicrmw or i32* %addr, i32 undef monotonic
+  ret void
+}
+
+; CHECK-LABEL: undef_operand_used
+; CHECK-NEXT: %res = atomicrmw or i32* %addr, i32 undef monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @undef_operand_used(i32* %addr) {
+  %res = atomicrmw or i32* %addr, i32 undef monotonic
+  ret i32 %res
+}
+
+
+
diff --git a/llvm/test/Transforms/InstCombine/badmalloc.ll b/llvm/test/Transforms/InstCombine/badmalloc.ll
new file mode 100644
index 00000000000..2074d262ccb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/badmalloc.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+declare noalias i8* @malloc(i64) nounwind
+declare void @free(i8*)
+
+; PR5130
+define i1 @test1() {
+  %A = call noalias i8* @malloc(i64 4) nounwind
+  %B = icmp eq i8* %A, null
+  store i8 0, i8* %A
+
+  call void @free(i8* %A)
+  ret i1 %B
+
+; CHECK-LABEL: @test1(
+; CHECK: ret i1 false
+}
+
+; CHECK-LABEL: @test2(
+define noalias i8* @test2() nounwind {
+entry:
+; CHECK: @malloc
+  %A = call noalias i8* @malloc(i64 4) nounwind
+; CHECK: icmp eq
+  %tobool = icmp eq i8* %A, null
+; CHECK: br i1
+  br i1 %tobool, label %return, label %if.end
+
+if.end:
+; CHECK: store
+  store i8 7, i8* %A
+  br label %return
+
+return:
+; CHECK: phi
+  %retval.0 = phi i8* [ %A, %if.end ], [ null, %entry ]
+  ret i8* %retval.0
+}
diff --git a/llvm/test/Transforms/InstCombine/binop-cast.ll b/llvm/test/Transforms/InstCombine/binop-cast.ll
new file mode 100644
index 00000000000..3dbca7ef148
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/binop-cast.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @testAdd(i32 %X, i32 %Y) {
+	%tmp = add i32 %X, %Y
+; CHECK: %tmp = add i32 %X, %Y
+	%tmp.l = bitcast i32 %tmp to i32
+	ret i32 %tmp.l
+; CHECK: ret i32 %tmp
+}
diff --git a/llvm/test/Transforms/InstCombine/bit-checks.ll b/llvm/test/Transforms/InstCombine/bit-checks.ll
new file mode 100644
index 00000000000..1ecd305e807
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bit-checks.ll
@@ -0,0 +1,647 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @main1(i32 %argc) {
+; CHECK-LABEL: @main1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 1
+  %tobool = icmp ne i32 %and, 0
+  %and2 = and i32 %argc, 2
+  %tobool3 = icmp ne i32 %and2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main2(i32 %argc) {
+; CHECK-LABEL: @main2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 3
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 1
+  %tobool = icmp eq i32 %and, 0
+  %and2 = and i32 %argc, 2
+  %tobool3 = icmp eq i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; tests to check combining (icmp eq (A & B), C) & (icmp eq (A & D), E)
+; tests to check if (icmp eq (A & B), 0) is treated like (icmp eq (A & B), B)
+; if B is a single bit constant
+
+; (icmp eq (A & B), 0) & (icmp eq (A & D), 0) -> (icmp eq (A & (B|D)), 0)
+define i32 @main3(i32 %argc) {
+; CHECK-LABEL: @main3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 0
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp eq i32 %and2, 0
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main3b(i32 %argc) {
+; CHECK-LABEL: @main3b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 0
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp ne i32 %and2, 16
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main3e_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main3e_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp eq i32 %and, 0
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp eq i32 %and2, 0
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), 0) | (icmp ne (A & D), 0) -> (icmp ne (A & (B|D)), 0)
+define i32 @main3c(i32 %argc) {
+; CHECK-LABEL: @main3c(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 0
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp ne i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main3d(i32 %argc) {
+; CHECK-LABEL: @main3d(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 0
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp eq i32 %and2, 16
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main3f_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main3f_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp ne i32 %and, 0
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp ne i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), B) & (icmp eq (A & D), D) -> (icmp eq (A & (B|D)), (B|D))
+define i32 @main4(i32 %argc) {
+; CHECK-LABEL: @main4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 55
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 7
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp eq i32 %and2, 48
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main4b(i32 %argc) {
+; CHECK-LABEL: @main4b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 23
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 7
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp ne i32 %and2, 0
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main4e_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main4e_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp eq i32 %and, %argc2
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp eq i32 %and2, %argc3
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), B) | (icmp ne (A & D), D) -> (icmp ne (A & (B|D)), (B|D))
+define i32 @main4c(i32 %argc) {
+; CHECK-LABEL: @main4c(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 55
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 7
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp ne i32 %and2, 48
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main4d(i32 %argc) {
+; CHECK-LABEL: @main4d(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 23
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 7
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp eq i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main4f_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main4f_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp ne i32 %and, %argc2
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp ne i32 %and2, %argc3
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), A) & (icmp eq (A & D), A) -> (icmp eq (A & (B&D)), A)
+define i32 @main5_like(i32 %argc, i32 %argc2) {
+; CHECK-LABEL: @main5_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, %argc2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 7
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], 7
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 7
+  %and2 = and i32 %argc2, 7
+  %tobool3 = icmp eq i32 %and2, 7
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main5e_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main5e_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], %argc
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp eq i32 %and, %argc
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp eq i32 %and2, %argc
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), A) | (icmp ne (A & D), A) -> (icmp ne (A & (B&D)), A)
+define i32 @main5c_like(i32 %argc, i32 %argc2) {
+; CHECK-LABEL: @main5c_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, %argc2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 7
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP2]], 7
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 7
+  %and2 = and i32 %argc2, 7
+  %tobool3 = icmp ne i32 %and2, 7
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main5f_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main5f_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP2]], %argc
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp ne i32 %and, %argc
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp ne i32 %and2, %argc
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), C) & (icmp eq (A & D), E) -> (icmp eq (A & (B|D)), (C|E))
+; if B, C, D, E are constant, and it's possible
+define i32 @main6(i32 %argc) {
+; CHECK-LABEL: @main6(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 19
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 3
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp eq i32 %and2, 16
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main6b(i32 %argc) {
+; CHECK-LABEL: @main6b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 19
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 3
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp ne i32 %and2, 0
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), C) | (icmp ne (A & D), E) -> (icmp ne (A & (B|D)), (C|E))
+; if B, C, D, E are constant, and it's possible
+define i32 @main6c(i32 %argc) {
+; CHECK-LABEL: @main6c(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 19
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 3
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp ne i32 %and2, 16
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main6d(i32 %argc) {
+; CHECK-LABEL: @main6d(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 19
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 3
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp eq i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; test parameter permutations
+; (B & A) == B & (D & A) == D
+define i32 @main7a(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main7a(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and1 = and i32 %argc2, %argc
+  %tobool = icmp eq i32 %and1, %argc2
+  %and2 = and i32 %argc3, %argc
+  %tobool3 = icmp eq i32 %and2, %argc3
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; B == (A & B) & D == (A & D)
+define i32 @main7b(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main7b(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and1 = and i32 %argc, %argc2
+  %tobool = icmp eq i32 %argc2, %and1
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp eq i32 %argc3, %and2
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; B == (B & A) & D == (D & A)
+define i32 @main7c(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main7c(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and1 = and i32 %argc2, %argc
+  %tobool = icmp eq i32 %argc2, %and1
+  %and2 = and i32 %argc3, %argc
+  %tobool3 = icmp eq i32 %argc3, %and2
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (A & (B & C)) == (B & C) & (A & (D & E)) == (D & E)
+define i32 @main7d(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) {
+; CHECK-LABEL: @main7d(
+; CHECK-NEXT:    [[BC:%.*]] = and i32 %argc2, %argc4
+; CHECK-NEXT:    [[DE:%.*]] = and i32 %argc3, %argc5
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[BC]], [[DE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %bc = and i32 %argc2, %argc4
+  %de = and i32 %argc3, %argc5
+  %and1 = and i32 %argc, %bc
+  %tobool = icmp eq i32 %and1, %bc
+  %and2 = and i32 %argc, %de
+  %tobool3 = icmp eq i32 %and2, %de
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; ((B & C) & A) == (B & C) & ((D & E) & A) == (D & E)
+define i32 @main7e(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) {
+; CHECK-LABEL: @main7e(
+; CHECK-NEXT:    [[BC:%.*]] = and i32 %argc2, %argc4
+; CHECK-NEXT:    [[DE:%.*]] = and i32 %argc3, %argc5
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[BC]], [[DE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %bc = and i32 %argc2, %argc4
+  %de = and i32 %argc3, %argc5
+  %and1 = and i32 %bc, %argc
+  %tobool = icmp eq i32 %and1, %bc
+  %and2 = and i32 %de, %argc
+  %tobool3 = icmp eq i32 %and2, %de
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (B & C) == (A & (B & C)) & (D & E) == (A & (D & E))
+define i32 @main7f(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) {
+; CHECK-LABEL: @main7f(
+; CHECK-NEXT:    [[BC:%.*]] = and i32 %argc2, %argc4
+; CHECK-NEXT:    [[DE:%.*]] = and i32 %argc3, %argc5
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[BC]], [[DE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %bc = and i32 %argc2, %argc4
+  %de = and i32 %argc3, %argc5
+  %and1 = and i32 %argc, %bc
+  %tobool = icmp eq i32 %bc, %and1
+  %and2 = and i32 %argc, %de
+  %tobool3 = icmp eq i32 %de, %and2
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (B & C) == ((B & C) & A) & (D & E) == ((D & E) & A)
+define i32 @main7g(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) {
+; CHECK-LABEL: @main7g(
+; CHECK-NEXT:    [[BC:%.*]] = and i32 %argc2, %argc4
+; CHECK-NEXT:    [[DE:%.*]] = and i32 %argc3, %argc5
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[BC]], [[DE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %bc = and i32 %argc2, %argc4
+  %de = and i32 %argc3, %argc5
+  %and1 = and i32 %bc, %argc
+  %tobool = icmp eq i32 %bc, %and1
+  %and2 = and i32 %de, %argc
+  %tobool3 = icmp eq i32 %de, %and2
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main8(i32 %argc) {
+; CHECK-LABEL: @main8(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 64
+  %tobool = icmp ne i32 %and, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp slt i8 %trunc2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main9(i32 %argc) {
+; CHECK-LABEL: @main9(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 192
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 64
+  %tobool = icmp ne i32 %and, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp slt i8 %trunc2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main10(i32 %argc) {
+; CHECK-LABEL: @main10(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 64
+  %tobool = icmp eq i32 %and, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp sge i8 %trunc2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main11(i32 %argc) {
+; CHECK-LABEL: @main11(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 192
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 64
+  %tobool = icmp eq i32 %and, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp sge i8 %trunc2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main12(i32 %argc) {
+; CHECK-LABEL: @main12(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %trunc = trunc i32 %argc to i16
+  %tobool = icmp slt i16 %trunc, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp slt i8 %trunc2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main13(i32 %argc) {
+; CHECK-LABEL: @main13(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 32896
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %trunc = trunc i32 %argc to i16
+  %tobool = icmp slt i16 %trunc, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp slt i8 %trunc2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main14(i32 %argc) {
+; CHECK-LABEL: @main14(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %trunc = trunc i32 %argc to i16
+  %tobool = icmp sge i16 %trunc, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp sge i8 %trunc2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main15(i32 %argc) {
+; CHECK-LABEL: @main15(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 32896
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %trunc = trunc i32 %argc to i16
+  %tobool = icmp sge i16 %trunc, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp sge i8 %trunc2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
diff --git a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll
new file mode 100644
index 00000000000..b04308e10e2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -0,0 +1,239 @@
+; RUN: opt -S -instcombine -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v64:64:64-v128:128:128-a0:0:64"
+
+
+
+; Cases that should be bitcast
+
+; Test cast between scalars with same bit sizes
+@alias_i32_to_f32 = alias float (float), bitcast (i32 (i32)* @func_i32 to float (float)*)
+
+; Test cast between vectors with same number of elements and bit sizes
+@alias_v2i32_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
+
+; Test cast from vector to scalar with same number of bits
+@alias_v2f32_to_i64 = alias <2 x float> (<2 x float>), bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
+
+; Test cast from scalar to vector with same number of bits
+@alias_i64_to_v2f32 = alias i64 (i64), bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
+
+; Test cast between vectors of pointers
+@alias_v2i32p_to_v2i64p = alias <2 x i64*> (<2 x i64*>), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
+
+
+; Cases that should be invalid and unchanged
+
+; Test cast between scalars with different bit sizes
+@alias_i64_to_f32 = alias float (float), bitcast (i64 (i64)* @func_i64 to float (float)*)
+
+; Test cast between vectors with different bit sizes but the
+; same number of elements
+@alias_v2i64_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
+
+; Test cast between vectors with same number of bits and different
+; numbers of elements
+@alias_v2i32_to_v4f32 = alias <4 x float> (<4 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
+
+; Test cast between scalar and vector with different number of bits
+@alias_i64_to_v4f32 = alias i64 (i64), bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
+
+; Test cast between vector and scalar with different number of bits
+@alias_v4f32_to_i64 = alias <4 x float> (<4 x float>), bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
+
+; Test cast from scalar to vector of pointers with same number of bits
+; We don't know the pointer size at this point, so this can't be done
+@alias_i64_to_v2i32p = alias i64 (i64), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
+
+; Test cast between vector of pointers and scalar with different number of bits
+@alias_v4i32p_to_i64 = alias <4 x i32*> (<4 x i32*>), bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
+
+
+
+define internal <2 x i32> @func_v2i32(<2 x i32> %v) noinline nounwind {
+entry:
+  ret <2 x i32> %v
+}
+
+define internal <2 x float> @func_v2f32(<2 x float> %v) noinline nounwind {
+entry:
+  ret <2 x float> %v
+}
+
+define internal <4 x float> @func_v4f32(<4 x float> %v) noinline nounwind {
+entry:
+  ret <4 x float> %v
+}
+
+define internal i32 @func_i32(i32 %v) noinline nounwind {
+entry:
+  ret i32 %v
+}
+
+define internal i64 @func_i64(i64 %v) noinline nounwind {
+entry:
+  ret i64 %v
+}
+
+define internal <2 x i64> @func_v2i64(<2 x i64> %v) noinline nounwind {
+entry:
+  ret <2 x i64> %v
+}
+
+define internal <2 x i32*> @func_v2i32p(<2 x i32*> %v) noinline nounwind {
+entry:
+  ret <2 x i32*> %v
+}
+
+; Valid cases, only bitcast for argument / return type and call underlying function
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar
+; CHECK: bitcast float* %source to i32*
+; CHECK: load i32, i32*
+; CHECK-NOT: fptoui
+; CHECK-NOT: uitofp
+; CHECK: bitcast float* %dest to i32*
+; CHECK: store i32
+  %tmp = load float, float* %source, align 8
+  %call = call float @alias_i32_to_f32(float %tmp) nounwind
+  store float %call, float* %dest, align 8
+  ret void
+}
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector
+; CHECK: bitcast <2 x float>* %source to <2 x i32>*
+; CHECK: load <2 x i32>, <2 x i32>*
+; CHECK-NOT: fptoui
+; CHECK-NOT: uitofp
+; CHECK: bitcast <2 x float>* %dest to <2 x i32>*
+; CHECK: store <2 x i32>
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
+; CHECK: bitcast <2 x float>* %source to i64*
+; CHECK: load i64, i64*
+; CHECK: %call = call i64 @func_i64
+; CHECK: bitcast <2 x float>* %dest to i64*
+; CHECK: store i64
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
+; CHECK: bitcast i64* %source to <2 x float>*
+; CHECK: load <2 x float>, <2 x float>*
+; CHECK: call <2 x float> @func_v2f32
+; CHECK: bitcast i64* %dest to <2 x float>*
+; CHECK: store <2 x float>
+  %tmp = load i64, i64* %source, align 8
+  %call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
+; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
+; CHECK: load <2 x i32*>, <2 x i32*>*
+; CHECK: call <2 x i32*> @func_v2i32p
+; CHECK: bitcast <2 x i64*>* %dest to <2 x i32*>*
+; CHECK: store <2 x i32*>
+  %tmp = load <2 x i64*>, <2 x i64*>* %source, align 8
+  %call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind
+  store <2 x i64*> %call, <2 x i64*>* %dest, align 8
+  ret void
+}
+
+; Invalid cases:
+
+define void @bitcast_alias_mismatch_scalar_size(float* noalias %source, float* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_mismatch_scalar_size
+; CHECK-NOT: fptoui
+; CHECK: @alias_i64_to_f32
+; CHECK-NOT: uitofp
+  %tmp = load float, float* %source, align 8
+  %call = call float @alias_i64_to_f32(float %tmp) nounwind
+  store float %call, float* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_mismatch_vector_element_and_bit_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_mismatch_vector_element_and_bit_size
+; CHECK-NOT: fptoui <2 x float> %tmp to <2 x i64>
+; CHECK: @alias_v2i64_to_v2f32
+; CHECK-NOT: uitofp <2 x i64> %call to <2 x float>
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2i64_to_v2f32(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_mismatched_number_elements(<4 x float>* noalias %source, <4 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_mismatched_number_elements
+; CHECK:  %call = call <4 x float> @alias_v2i32_to_v4f32
+  %tmp = load <4 x float>, <4 x float>* %source, align 8
+  %call = call <4 x float> @alias_v2i32_to_v4f32(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_scalar_mismatched_bit_size(<4 x float>* noalias %source, <4 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_scalar_mismatched_bit_size
+; CHECK:  %call = call <4 x float> @alias_v4f32_to_i64
+  %tmp = load <4 x float>, <4 x float>* %source, align 8
+  %call = call <4 x float> @alias_v4f32_to_i64(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_ptrs_scalar_mismatched_bit_size(<4 x i32*>* noalias %source, <4 x i32*>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_ptrs_scalar_mismatched_bit_size
+; CHECK: @alias_v4i32p_to_i64
+  %tmp = load <4 x i32*>, <4 x i32*>* %source, align 8
+  %call = call <4 x i32*> @alias_v4i32p_to_i64(<4 x i32*> %tmp) nounwind
+  store <4 x i32*> %call, <4 x i32*>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_ptrs_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_ptrs_same_size
+; CHECK: @alias_i64_to_v2i32p
+  %tmp = load i64, i64* %source, align 8
+  %call = call i64 @alias_i64_to_v2i32p(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_mismatched_bit_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_mismatched_bit_size
+; CHECK: call i64 @alias_i64_to_v4f32
+  %tmp = load i64, i64* %source, align 8
+  %call = call i64 @alias_i64_to_v4f32(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/bitcast-bigendian.ll b/llvm/test/Transforms/InstCombine/bitcast-bigendian.ll
new file mode 100644
index 00000000000..0001fab8c16
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; These tests are extracted from bitcast.ll.
+; Verify that they also work correctly on big-endian targets.
+
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> [[B:%.*]] to <2 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp23 = trunc i64 %tmp28 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i32> %B to i64
+  %tmp2 = trunc i64 %tmp to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+}
+
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BC2]], i32 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+}
+
+define <2 x i32> @test4(i32 %A, i32 %B){
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+}
+
+define <2 x float> @test5(float %A, float %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+;
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+}
+
+define <2 x float> @test6(float %A){
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 4.200000e+01>, float [[A:%.*]], i32 0
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
+  %tmp23 = bitcast float %A to i32
+  %tmp24 = zext i32 %tmp23 to i64
+  %tmp25 = shl i64 %tmp24, 32
+  %mask20 = or i64 %tmp25, 1109917696
+  %tmp35 = bitcast i64 %mask20 to <2 x float>
+  ret <2 x float> %tmp35
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+; CHECK-LABEL: @xor_bitcast_vec_to_vec(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = xor <2 x i32> [[T1]], <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[T2]]
+;
+  %t1 = bitcast <1 x i64> %a to <2 x i32>
+  %t2 = xor <2 x i32> <i32 1, i32 2>, %t1
+  ret <2 x i32> %t2
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
+; CHECK-LABEL: @and_bitcast_vec_to_int(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <2 x i32> [[A:%.*]] to i64
+; CHECK-NEXT:    [[T2:%.*]] = and i64 [[T1]], 3
+; CHECK-NEXT:    ret i64 [[T2]]
+;
+  %t1 = bitcast <2 x i32> %a to i64
+  %t2 = and i64 %t1, 3
+  ret i64 %t2
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
+; CHECK-LABEL: @or_bitcast_int_to_vec(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast i64 [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = or <2 x i32> [[T1]], <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[T2]]
+;
+  %t1 = bitcast i64 %a to <2 x i32>
+  %t2 = or <2 x i32> %t1, <i32 1, i32 2>
+  ret <2 x i32> %t2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/bitcast-bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast-bitcast.ll
new file mode 100644
index 00000000000..0f46ff53bc1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast-bitcast.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check all scalar / vector combinations for a pair of bitcasts.
+
+define ppc_fp128 @bitcast_bitcast_s_s_s(i128 %a) {
+  %bc1 = bitcast i128 %a to fp128
+  %bc2 = bitcast fp128 %bc1 to ppc_fp128
+  ret ppc_fp128 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_s(
+; CHECK-NEXT:  %bc2 = bitcast i128 %a to ppc_fp128
+; CHECK-NEXT:  ret ppc_fp128 %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_s_v(i64 %a) {
+  %bc1 = bitcast i64 %a to double
+  %bc2 = bitcast double %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_v(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
+define double @bitcast_bitcast_s_v_s(i64 %a) {
+  %bc1 = bitcast i64 %a to <2 x i32>
+  %bc2 = bitcast <2 x i32> %bc1 to double
+  ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_s(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to double
+; CHECK-NEXT:  ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_v_v(i64 %a) {
+  %bc1 = bitcast i64 %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_v(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
+define i64 @bitcast_bitcast_v_s_s(<2 x i32> %a) {
+  %bc1 = bitcast <2 x i32> %a to double
+  %bc2 = bitcast double %bc1 to i64
+  ret i64 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_s(
+; CHECK-NEXT:  %bc2 = bitcast <2 x i32> %a to i64
+; CHECK-NEXT:  ret i64 %bc2
+}
+
+define <4 x i16> @bitcast_bitcast_v_s_v(<2 x i32> %a) {
+  %bc1 = bitcast <2 x i32> %a to double
+  %bc2 = bitcast double %bc1 to <4 x i16>
+  ret <4 x i16> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_v(
+; CHECK-NEXT:  %bc2 = bitcast <2 x i32> %a to <4 x i16>
+; CHECK-NEXT:  ret <4 x i16> %bc2
+}
+
+define double @bitcast_bitcast_v_v_s(<2 x float> %a) {
+  %bc1 = bitcast <2 x float> %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to double
+  ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_s(
+; CHECK-NEXT:  %bc2 = bitcast <2 x float> %a to double
+; CHECK-NEXT:  ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_v_v_v(<2 x float> %a) {
+  %bc1 = bitcast <2 x float> %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_v(
+; CHECK-NEXT:  %bc2 = bitcast <2 x float> %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/bitcast-sext-vector.ll b/llvm/test/Transforms/InstCombine/bitcast-sext-vector.ll
new file mode 100644
index 00000000000..d70bdbaf372
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast-sext-vector.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: sext
+; Don't fold zero/sign extensions with a bitcast between a vector and scalar.
+
+define i32 @t(<4 x i8> %src1, <4 x i8> %src2) nounwind readonly {
+entry:
+	%cmp = icmp eq <4 x i8> %src1, %src2; <<4 x i1>> [#uses=1]
+	%sext = sext <4 x i1> %cmp to <4 x i8>
+	%val = bitcast <4 x i8> %sext to i32
+	ret i32 %val
+}
diff --git a/llvm/test/Transforms/InstCombine/bitcast-store.ll b/llvm/test/Transforms/InstCombine/bitcast-store.ll
new file mode 100644
index 00000000000..2308d77be32
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast-store.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Instcombine should preserve metadata and alignment while
+; folding a bitcast into a store.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.A = type { i32 (...)** }
+
+@G = external constant [5 x i8*]
+
+; CHECK-LABEL: @foo
+; CHECK: store i32 %x, i32* %{{.*}}, align 16, !noalias !0, !llvm.access.group !1
+define void @foo(i32 %x, float* %p) nounwind {
+entry:
+  %x.cast = bitcast i32 %x to float
+  store float %x.cast, float* %p, align 16, !noalias !0, !llvm.access.group !1
+  ret void
+}
+
+; Check instcombine doesn't try and fold the following bitcast into the store.
+; This transformation would not be safe since we would need to use addrspacecast
+; and addrspacecast is not guaranteed to be a no-op cast.
+
+; CHECK-LABEL: @bar
+; CHECK: %cast = bitcast i8** %b to i8 addrspace(1)**
+; CHECK: store i8 addrspace(1)* %a, i8 addrspace(1)** %cast
+define void @bar(i8 addrspace(1)* %a, i8** %b) nounwind {
+entry:
+  %cast = bitcast i8** %b to i8 addrspace(1)**
+  store i8 addrspace(1)* %a, i8 addrspace(1)** %cast
+  ret void
+}
+
+; Check that we don't combine the bitcast into the store. This would create a
+; bitcast of the swifterror which is invalid.
+
+; CHECK-LABEL; @swifterror_store
+; CHECK: bitcast i64
+; CHECK: store %swift.error
+
+%swift.error = type opaque
+define void @swifterror_store(i64* %x, %swift.error** swifterror %err) {
+entry:
+  %casted = bitcast i64* %x to %swift.error*
+  store %swift.error* %casted, %swift.error** %err
+  ret void
+}
+
+!0 = !{!0}
+!1 = !{}
+\ No newline at end of file
diff --git a/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll b/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll
new file mode 100644
index 00000000000..a92a7b73fd7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @a(<1 x i64> %y) {
+; CHECK-LABEL: @a(
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <1 x i64> %y to <1 x double>
+; CHECK-NEXT:    [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0
+; CHECK-NEXT:    ret double [[C]]
+;
+  %c = bitcast <1 x i64> %y to double
+  ret double %c
+}
+
+define i64 @b(<1 x i64> %y) {
+; CHECK-LABEL: @b(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> %y, i32 0
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %c = bitcast <1 x i64> %y to i64
+  ret i64 %c
+}
+
+define <1 x i64> @c(double %y) {
+; CHECK-LABEL: @c(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double %y to i64
+; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+; CHECK-NEXT:    ret <1 x i64> [[C]]
+;
+  %c = bitcast double %y to <1 x i64>
+  ret <1 x i64> %c
+}
+
+define <1 x i64> @d(i64 %y) {
+; CHECK-LABEL: @d(
+; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 %y, i32 0
+; CHECK-NEXT:    ret <1 x i64> [[C]]
+;
+  %c = bitcast i64 %y to <1 x i64>
+  ret <1 x i64> %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll
new file mode 100644
index 00000000000..0f0cbdb364a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast.ll
@@ -0,0 +1,563 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Bitcasts between vectors and scalars are valid.
+; PR4487
+define i32 @test1(i64 %a) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 0
+;
+  %t1 = bitcast i64 %a to <2 x i32>
+  %t2 = bitcast i64 %a to <2 x i32>
+  %t3 = xor <2 x i32> %t1, %t2
+  %t4 = extractelement <2 x i32> %t3, i32 0
+  ret i32 %t4
+}
+
+; Perform the bitwise logic in the source type of the operands to eliminate bitcasts.
+
+define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: @xor_two_vector_bitcasts(
+; CHECK-NEXT:    [[T31:%.*]] = xor <1 x i64> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = bitcast <1 x i64> [[T31]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[T3]]
+;
+  %t1 = bitcast <1 x i64> %a to <2 x i32>
+  %t2 = bitcast <1 x i64> %b to <2 x i32>
+  %t3 = xor <2 x i32> %t1, %t2
+  ret <2 x i32> %t3
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+; CHECK-LABEL: @xor_bitcast_vec_to_vec(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = xor <2 x i32> [[T1]], <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[T2]]
+;
+  %t1 = bitcast <1 x i64> %a to <2 x i32>
+  %t2 = xor <2 x i32> <i32 1, i32 2>, %t1
+  ret <2 x i32> %t2
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
+; CHECK-LABEL: @and_bitcast_vec_to_int(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <2 x i32> [[A:%.*]] to i64
+; CHECK-NEXT:    [[T2:%.*]] = and i64 [[T1]], 3
+; CHECK-NEXT:    ret i64 [[T2]]
+;
+  %t1 = bitcast <2 x i32> %a to i64
+  %t2 = and i64 %t1, 3
+  ret i64 %t2
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
+; CHECK-LABEL: @or_bitcast_int_to_vec(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast i64 [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = or <2 x i32> [[T1]], <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[T2]]
+;
+  %t1 = bitcast i64 %a to <2 x i32>
+  %t2 = or <2 x i32> %t1, <i32 1, i32 2>
+  ret <2 x i32> %t2
+}
+
+; PR26702 - https://bugs.llvm.org//show_bug.cgi?id=26702
+; Bitcast is canonicalized above logic, so we can see the not-not pattern.
+
+define <2 x i64> @is_negative(<4 x i32> %x) {
+; CHECK-LABEL: @is_negative(
+; CHECK-NEXT:    [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[NOTNOT:%.*]] = bitcast <4 x i32> [[LOBIT]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[NOTNOT]]
+;
+  %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+  %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc = bitcast <4 x i32> %not to <2 x i64>
+  %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
+  ret <2 x i64> %notnot
+}
+
+; This variation has an extra bitcast at the end. This means that the 2nd xor
+; can be done in <4 x i32> to eliminate a bitcast regardless of canonicalizaion.
+
+define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
+; CHECK-LABEL: @is_negative_bonus_bitcast(
+; CHECK-NEXT:    [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[LOBIT]]
+;
+  %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+  %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc = bitcast <4 x i32> %not to <2 x i64>
+  %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
+  %bc2 = bitcast <2 x i64> %notnot to <4 x i32>
+  ret <4 x i32> %bc2
+}
+
+; Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) {
+; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 -128, i8 -128>
+; CHECK-NEXT:    ret <2 x i8> [[B]]
+;
+  %a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8>
+  %b = bitcast <4 x i4> %a to <2 x i8>
+  ret <2 x i8> %b
+}
+
+; PR27925 - https://llvm.org/bugs/show_bug.cgi?id=27925
+
+define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) {
+; CHECK-LABEL: @bitcasts_and_bitcast(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[BC3:%.*]] = and <4 x i32> [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[BC3]]
+;
+  %bc1 = bitcast <4 x i32> %a to <2 x i64>
+  %bc2 = bitcast <8 x i16> %b to <2 x i64>
+  %and = and <2 x i64> %bc2, %bc1
+  %bc3 = bitcast <2 x i64> %and to <4 x i32>
+  ret <4 x i32> %bc3
+}
+
+; The destination must have an integer element type.
+; FIXME: We can still eliminate one bitcast in this test by doing the logic op
+; in the type of the input that has an integer element type.
+
+define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) {
+; CHECK-LABEL: @bitcasts_and_bitcast_to_fp(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[BC2]], [[BC1]]
+; CHECK-NEXT:    [[BC3:%.*]] = bitcast <2 x i64> [[AND]] to <4 x float>
+; CHECK-NEXT:    ret <4 x float> [[BC3]]
+;
+  %bc1 = bitcast <4 x float> %a to <2 x i64>
+  %bc2 = bitcast <8 x i16> %b to <2 x i64>
+  %and = and <2 x i64> %bc2, %bc1
+  %bc3 = bitcast <2 x i64> %and to <4 x float>
+  ret <4 x float> %bc3
+}
+
+; FIXME: Transform limited from changing vector op to integer op to avoid codegen problems.
+
+define i128 @bitcast_or_bitcast(i128 %a, <2 x i64> %b) {
+; CHECK-LABEL: @bitcast_or_bitcast(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast i128 [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i64> [[BC1]], [[B:%.*]]
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[OR]] to i128
+; CHECK-NEXT:    ret i128 [[BC2]]
+;
+  %bc1 = bitcast i128 %a to <2 x i64>
+  %or = or <2 x i64> %b, %bc1
+  %bc2 = bitcast <2 x i64> %or to i128
+  ret i128 %bc2
+}
+
+; FIXME: Transform limited from changing integer op to vector op to avoid codegen problems.
+
+define <4 x i32> @bitcast_xor_bitcast(<4 x i32> %a, i128 %b) {
+; CHECK-LABEL: @bitcast_xor_bitcast(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[A:%.*]] to i128
+; CHECK-NEXT:    [[XOR:%.*]] = xor i128 [[BC1]], [[B:%.*]]
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast i128 [[XOR]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[BC2]]
+;
+  %bc1 = bitcast <4 x i32> %a to i128
+  %xor = xor i128 %bc1, %b
+  %bc2 = bitcast i128 %xor to <4 x i32>
+  ret <4 x i32> %bc2
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=6137#c6
+
+define <4 x float> @bitcast_vector_select(<4 x float> %x, <2 x i64> %y, <4 x i1> %cmp) {
+; CHECK-LABEL: @bitcast_vector_select(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T7:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x float> [[X:%.*]], <4 x float> [[TMP1]]
+; CHECK-NEXT:    ret <4 x float> [[T7]]
+;
+  %t4 = bitcast <4 x float> %x to <4 x i32>
+  %t5 = bitcast <2 x i64> %y to <4 x i32>
+  %t6 = select <4 x i1> %cmp, <4 x i32> %t4, <4 x i32> %t5
+  %t7 = bitcast <4 x i32> %t6 to <4 x float>
+  ret <4 x float> %t7
+}
+
+define float @bitcast_scalar_select_of_scalars(float %x, i32 %y, i1 %cmp) {
+; CHECK-LABEL: @bitcast_scalar_select_of_scalars(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[Y:%.*]] to float
+; CHECK-NEXT:    [[T7:%.*]] = select i1 [[CMP:%.*]], float [[X:%.*]], float [[TMP1]]
+; CHECK-NEXT:    ret float [[T7]]
+;
+  %t4 = bitcast float %x to i32
+  %t6 = select i1 %cmp, i32 %t4, i32 %y
+  %t7 = bitcast i32 %t6 to float
+  ret float %t7
+}
+
+; FIXME: We should change the select operand types to scalars, but we need to make
+; sure the backend can reverse that transform if needed.
+
+define float @bitcast_scalar_select_type_mismatch1(float %x, <4 x i8> %y, i1 %cmp) {
+; CHECK-LABEL: @bitcast_scalar_select_type_mismatch1(
+; CHECK-NEXT:    [[T4:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select i1 [[CMP:%.*]], <4 x i8> [[T4]], <4 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[T7:%.*]] = bitcast <4 x i8> [[T6]] to float
+; CHECK-NEXT:    ret float [[T7]]
+;
+  %t4 = bitcast float %x to <4 x i8>
+  %t6 = select i1 %cmp, <4 x i8> %t4, <4 x i8> %y
+  %t7 = bitcast <4 x i8> %t6 to float
+  ret float %t7
+}
+
+; FIXME: We should change the select operand types to vectors, but we need to make
+; sure the backend can reverse that transform if needed.
+
+define <4 x i8> @bitcast_scalar_select_type_mismatch2(<4 x i8> %x, float %y, i1 %cmp) {
+; CHECK-LABEL: @bitcast_scalar_select_type_mismatch2(
+; CHECK-NEXT:    [[T4:%.*]] = bitcast <4 x i8> [[X:%.*]] to float
+; CHECK-NEXT:    [[T6:%.*]] = select i1 [[CMP:%.*]], float [[T4]], float [[Y:%.*]]
+; CHECK-NEXT:    [[T7:%.*]] = bitcast float [[T6]] to <4 x i8>
+; CHECK-NEXT:    ret <4 x i8> [[T7]]
+;
+  %t4 = bitcast <4 x i8> %x to float
+  %t6 = select i1 %cmp, float %t4, float %y
+  %t7 = bitcast float %t6 to <4 x i8>
+  ret <4 x i8> %t7
+}
+
+define <4 x float> @bitcast_scalar_select_of_vectors(<4 x float> %x, <2 x i64> %y, i1 %cmp) {
+; CHECK-LABEL: @bitcast_scalar_select_of_vectors(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T7:%.*]] = select i1 [[CMP:%.*]], <4 x float> [[X:%.*]], <4 x float> [[TMP1]]
+; CHECK-NEXT:    ret <4 x float> [[T7]]
+;
+  %t4 = bitcast <4 x float> %x to <4 x i32>
+  %t5 = bitcast <2 x i64> %y to <4 x i32>
+  %t6 = select i1 %cmp, <4 x i32> %t4, <4 x i32> %t5
+  %t7 = bitcast <4 x i32> %t6 to <4 x float>
+  ret <4 x float> %t7
+}
+
+; Can't change the type of the vector select if the dest type is scalar.
+
+define float @bitcast_vector_select_no_fold1(float %x, <2 x i16> %y, <4 x i1> %cmp) {
+; CHECK-LABEL: @bitcast_vector_select_no_fold1(
+; CHECK-NEXT:    [[T4:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <2 x i16> [[Y:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i8> [[T4]], <4 x i8> [[T5]]
+; CHECK-NEXT:    [[T7:%.*]] = bitcast <4 x i8> [[T6]] to float
+; CHECK-NEXT:    ret float [[T7]]
+;
+  %t4 = bitcast float %x to <4 x i8>
+  %t5 = bitcast <2 x i16> %y to <4 x i8>
+  %t6 = select <4 x i1> %cmp, <4 x i8> %t4, <4 x i8> %t5
+  %t7 = bitcast <4 x i8> %t6 to float
+  ret float %t7
+}
+
+; Can't change the type of the vector select if the number of elements in the dest type is not the same.
+
+define <2 x float> @bitcast_vector_select_no_fold2(<2 x float> %x, <4 x i16> %y, <8 x i1> %cmp) {
+; CHECK-LABEL: @bitcast_vector_select_no_fold2(
+; CHECK-NEXT:    [[T4:%.*]] = bitcast <2 x float> [[X:%.*]] to <8 x i8>
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <4 x i16> [[Y:%.*]] to <8 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select <8 x i1> [[CMP:%.*]], <8 x i8> [[T4]], <8 x i8> [[T5]]
+; CHECK-NEXT:    [[T7:%.*]] = bitcast <8 x i8> [[T6]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[T7]]
+;
+  %t4 = bitcast <2 x float> %x to <8 x i8>
+  %t5 = bitcast <4 x i16> %y to <8 x i8>
+  %t6 = select <8 x i1> %cmp, <8 x i8> %t4, <8 x i8> %t5
+  %t7 = bitcast <8 x i8> %t6 to <2 x float>
+  ret <2 x float> %t7
+}
+
+; Optimize bitcasts that are extracting low element of vector.  This happens because of SRoA.
+; rdar://7892780
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> [[B:%.*]] to <2 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 0
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %tmp28 = bitcast <2 x float> %A to i64  ; <i64> [#uses=2]
+  %tmp23 = trunc i64 %tmp28 to i32                ; <i32> [#uses=1]
+  %tmp24 = bitcast i32 %tmp23 to float            ; <float> [#uses=1]
+
+  %tmp = bitcast <2 x i32> %B to i64
+  %tmp2 = trunc i64 %tmp to i32                ; <i32> [#uses=1]
+  %tmp4 = bitcast i32 %tmp2 to float            ; <float> [#uses=1]
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+}
+
+; Optimize bitcasts that are extracting other elements of a vector.  This happens because of SRoA.
+; rdar://7892780
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BC2]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+}
+
+; Both bitcasts are unnecessary; change the extractelement.
+
+define float @bitcast_extelt1(<2 x float> %A) {
+; CHECK-LABEL: @bitcast_extelt1(
+; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    ret float [[BC2]]
+;
+  %bc1 = bitcast <2 x float> %A to <2 x i32>
+  %ext = extractelement <2 x i32> %bc1, i32 0
+  %bc2 = bitcast i32 %ext to float
+  ret float %bc2
+}
+
+; Second bitcast can be folded into the first.
+
+define i64 @bitcast_extelt2(<4 x float> %A) {
+; CHECK-LABEL: @bitcast_extelt2(
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x i64> [[BC]], i32 1
+; CHECK-NEXT:    ret i64 [[BC2]]
+;
+  %bc1 = bitcast <4 x float> %A to <2 x double>
+  %ext = extractelement <2 x double> %bc1, i32 1
+  %bc2 = bitcast double %ext to i64
+  ret i64 %bc2
+}
+
+; TODO: This should return %A.
+
+define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
+; CHECK-LABEL: @bitcast_extelt3(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <2 x i32> [[A:%.*]] to <1 x i64>
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <1 x i64> [[BC1]], i32 0
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast i64 [[EXT]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[BC2]]
+;
+  %bc1 = bitcast <2 x i32> %A to <1 x i64>
+  %ext = extractelement <1 x i64> %bc1, i32 0
+  %bc2 = bitcast i64 %ext to <2 x i32>
+  ret <2 x i32> %bc2
+}
+
+; Handle the case where the input is not a vector.
+
+define double @bitcast_extelt4(i128 %A) {
+; CHECK-LABEL: @bitcast_extelt4(
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i128 [[A:%.*]] to <2 x double>
+; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x double> [[BC]], i32 0
+; CHECK-NEXT:    ret double [[BC2]]
+;
+  %bc1 = bitcast i128 %A to <2 x i64>
+  %ext = extractelement <2 x i64> %bc1, i32 0
+  %bc2 = bitcast i64 %ext to double
+  ret double %bc2
+}
+
+define <2 x i32> @test4(i32 %A, i32 %B){
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+}
+
+; rdar://8360454
+define <2 x float> @test5(float %A, float %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+;
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+}
+
+define <2 x float> @test6(float %A){
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float 4.200000e+01, float undef>, float [[A:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
+  %tmp23 = bitcast float %A to i32
+  %tmp24 = zext i32 %tmp23 to i64
+  %tmp25 = shl i64 %tmp24, 32
+  %mask20 = or i64 %tmp25, 1109917696
+  %tmp35 = bitcast i64 %mask20 to <2 x float>
+  ret <2 x float> %tmp35
+}
+
+define i64 @ISPC0(i64 %in) {
+; CHECK-LABEL: @ISPC0(
+; CHECK-NEXT:    ret i64 0
+;
+  %out = and i64 %in, xor (i64 bitcast (<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1> to i64), i64 -1)
+  ret i64 %out
+}
+
+
+define i64 @Vec2(i64 %in) {
+; CHECK-LABEL: @Vec2(
+; CHECK-NEXT:    ret i64 0
+;
+  %out = and i64 %in, xor (i64 bitcast (<4 x i16> <i16 0, i16 0, i16 0, i16 0> to i64), i64 0)
+  ret i64 %out
+}
+
+define i64 @All11(i64 %in) {
+; CHECK-LABEL: @All11(
+; CHECK-NEXT:    ret i64 0
+;
+  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1)
+  ret i64 %out
+}
+
+
+define i32 @All111(i32 %in) {
+; CHECK-LABEL: @All111(
+; CHECK-NEXT:    ret i32 0
+;
+  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1)
+  ret i32 %out
+}
+
+define <2 x i16> @BitcastInsert(i32 %a) {
+; CHECK-LABEL: @BitcastInsert(
+; CHECK-NEXT:    [[R:%.*]] = bitcast i32 [[A:%.*]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %v = insertelement <1 x i32> undef, i32 %a, i32 0
+  %r = bitcast <1 x i32> %v to <2 x i16>
+  ret <2 x i16> %r
+}
+
+; PR17293
+define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i8*>* [[ARG:%.*]] to <2 x i64>*
+; CHECK-NEXT:    [[LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[CAST]], align 16
+; CHECK-NEXT:    ret <2 x i64> [[LOAD]]
+;
+  %cast = bitcast <2 x i8*>* %arg to <2 x i64>*
+  %load = load <2 x i64>, <2 x i64>* %cast, align 16
+  ret <2 x i64> %load
+}
+
+define i8 @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i8 -85
+;
+  %res = bitcast <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true> to i8
+  ret i8 %res
+}
+
+@g = internal unnamed_addr global i32 undef
+
+define void @constant_fold_vector_to_double() {
+; CHECK-LABEL: @constant_fold_vector_to_double(
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0xFFFFFFFFFFFFFFFF, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0x162E000004D2, double* undef, align 8
+; CHECK-NEXT:    store volatile double bitcast (<2 x i32> <i32 1234, i32 ptrtoint (i32* @g to i32)> to double), double* undef, align 8
+; CHECK-NEXT:    store volatile double 0x400000003F800000, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    ret void
+;
+  store volatile double bitcast (<1 x i64> <i64 4607182418800017408> to double), double* undef
+  store volatile double bitcast (<2 x i32> <i32 0, i32 1072693248> to double), double* undef
+  store volatile double bitcast (<4 x i16> <i16 0, i16 0, i16 0, i16 16368> to double), double* undef
+  store volatile double bitcast (<8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 240, i8 63> to double), double* undef
+
+  store volatile double bitcast (<2 x i32> <i32 -1, i32 -1> to double), double* undef
+  store volatile double bitcast (<2 x i32> <i32 1234, i32 5678> to double), double* undef
+
+  store volatile double bitcast (<2 x i32> <i32 1234, i32 ptrtoint (i32* @g to i32)> to double), double* undef
+  store volatile double bitcast (<2 x float> <float 1.0, float 2.0> to double), double* undef
+
+  store volatile double bitcast (<2 x i32> zeroinitializer to double), double* undef
+  store volatile double bitcast (<4 x i16> zeroinitializer to double), double* undef
+  store volatile double bitcast (<8 x i8> zeroinitializer to double), double* undef
+  store volatile double bitcast (<16 x i4> zeroinitializer to double), double* undef
+  store volatile double bitcast (<32 x i2> zeroinitializer to double), double* undef
+  store volatile double bitcast (<64 x i1> zeroinitializer to double), double* undef
+  ret void
+}
+
+define void @constant_fold_vector_to_float() {
+; CHECK-LABEL: @constant_fold_vector_to_float(
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    ret void
+;
+  store volatile float bitcast (<1 x i32> <i32 1065353216> to float), float* undef
+  store volatile float bitcast (<2 x i16> <i16 0, i16 16256> to float), float* undef
+  store volatile float bitcast (<4 x i8> <i8 0, i8 0, i8 128, i8 63> to float), float* undef
+  store volatile float bitcast (<32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0> to float), float* undef
+
+  ret void
+}
+
+define void @constant_fold_vector_to_half() {
+; CHECK-LABEL: @constant_fold_vector_to_half(
+; CHECK-NEXT:    store volatile half 0xH4000, half* undef, align 2
+; CHECK-NEXT:    store volatile half 0xH4000, half* undef, align 2
+; CHECK-NEXT:    ret void
+;
+  store volatile half bitcast (<2 x i8> <i8 0, i8 64> to half), half* undef
+  store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), half* undef
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/bitreverse-hang.ll b/llvm/test/Transforms/InstCombine/bitreverse-hang.ll
new file mode 100644
index 00000000000..8e6585e995d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitreverse-hang.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-unroll -instcombine -S | FileCheck %s
+
+; This test is a worst-case scenario for bitreversal/byteswap detection.
+; After loop unrolling (the unrolled loop is unreadably large so it has been kept
+; rolled here), we have a binary tree of OR operands (as bitreversal detection
+; looks straight through shifts):
+;
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;
+; This results in exponential runtime. The loop here is 32 iterations which will
+; totally hang if we don't deal with this case cleverly.
+
+@b = common global i32 0, align 4
+
+; CHECK: define i32 @fn1
+define i32 @fn1() #0 {
+entry:
+  %b.promoted = load i32, i32* @b, align 4, !tbaa !2
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %shr = lshr i32 %or4, 1
+  %or = or i32 %shr, %or4
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 32
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  store i32 %or, i32* @b, align 4, !tbaa !2
+  ret i32 undef
+}
+
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll b/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll
new file mode 100644
index 00000000000..cd1523a3b06
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitreverse-known-bits.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+declare i8 @llvm.bitreverse.i8(i8)
+declare i32 @llvm.bitreverse.i32(i32)
+
+; CHECK-LABEL: @test1
+; CHECK: ret i1 true
+define i1 @test1(i32 %arg) {
+  %a = or i32 %arg, 4294901760
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %and = and i32 %b, 65535
+  %res = icmp eq i32 %and, 65535
+  ret i1 %res
+}
+
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+define i1 @test2(i32 %arg) {
+  %a = or i32 %arg, 1
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %c = and i32 %b, 2147483648
+  %d = call i32 @llvm.bitreverse.i32(i32 %c)
+  %res = icmp eq i32 %d, 1
+  ret i1 %res
+}
+
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+define i1 @test3(i32 %arg) {
+  %a = or i32 %arg, 65536
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %and = and i32 %b, 32768
+  %res = icmp eq i32 %and, 0
+  ret i1 %res
+}
+
+; CHECK-LABEL: @add_bitreverse
+; Make sure we process range metadata on bitreverse
+define i8 @add_bitreverse(i8 %a) {
+  %b = and i8 %a, 252
+  ; known bits for the bitreverse will say the result is in the range [0, 64)
+  ; but the metadata says [0, 16). So make sure the range metadata wins.
+  ;    add %reverse, 1111 0000
+  ; should become
+  ;    or  %reverse, 1111 0000
+  %reverse = call i8 @llvm.bitreverse.i8(i8 %b), !range !1
+  %c = add i8 %reverse, -16
+; CHECK: or i8 %reverse, -16
+  ret i8 %c
+}
+!1 = !{i8 0, i8 16}
diff --git a/llvm/test/Transforms/InstCombine/bittest.ll b/llvm/test/Transforms/InstCombine/bittest.ll
new file mode 100644
index 00000000000..edf65d5a87b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bittest.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -simplifycfg -S |\
+; RUN:    not grep "call void @abort"
+
+@b_rec.0 = external global i32          ; <i32*> [#uses=2]
+
+define void @_Z12h000007_testv(i32* %P) {
+entry:
+        %tmp.2 = load i32, i32* @b_rec.0             ; <i32> [#uses=1]
+        %tmp.9 = or i32 %tmp.2, -989855744              ; <i32> [#uses=2]
+        %tmp.16 = and i32 %tmp.9, -805306369            ; <i32> [#uses=2]
+        %tmp.17 = and i32 %tmp.9, -973078529            ; <i32> [#uses=1]
+        store i32 %tmp.17, i32* @b_rec.0
+        %tmp.17.shrunk = bitcast i32 %tmp.16 to i32             ; <i32> [#uses=1]
+        %tmp.22 = and i32 %tmp.17.shrunk, -1073741824           ; <i32> [#uses=1]
+        %tmp.23 = icmp eq i32 %tmp.22, -1073741824              ; <i1> [#uses=1]
+        br i1 %tmp.23, label %endif.0, label %then.0
+
+then.0:         ; preds = %entry
+        tail call void @abort( )
+        unreachable
+
+endif.0:                ; preds = %entry
+        %tmp.17.shrunk2 = bitcast i32 %tmp.16 to i32            ; <i32> [#uses=1]
+        %tmp.27.mask = and i32 %tmp.17.shrunk2, 100663295               ; <i32> [#uses=1]
+        store i32 %tmp.27.mask, i32* %P
+        ret void
+}
+
+declare void @abort()
+
diff --git a/llvm/test/Transforms/InstCombine/branch.ll b/llvm/test/Transforms/InstCombine/branch.ll
new file mode 100644
index 00000000000..2168c9fd9a1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/branch.ll
@@ -0,0 +1,27 @@
+; Check that we fold the condition of branches of the
+; form: br <condition> dest1, dest2, where dest1 == dest2.
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test(i32 %x) {
+; CHECK-LABEL: @test
+entry:
+; CHECK-NOT: icmp
+; CHECK: br i1 false
+  %cmp = icmp ult i32 %x, 7
+  br i1 %cmp, label %merge, label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK: ret i32 %x
+  ret i32 %x
+}
+
+@global = global i8 0
+
+define i32 @pat(i32 %x) {
+; CHECK-NOT: icmp false
+; CHECK: br i1 false
+  %y = icmp eq i32 27, ptrtoint(i8* @global to i32)
+  br i1 %y, label %patatino, label %patatino
+patatino:
+  ret i32 %x
+}
diff --git a/llvm/test/Transforms/InstCombine/broadcast.ll b/llvm/test/Transforms/InstCombine/broadcast.ll
new file mode 100644
index 00000000000..8485cd9c53f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/broadcast.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK-LABEL: good1
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good1(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good2
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good2(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 1
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 2
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 0
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good3
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good3(float %arg) {
+  %tmp = insertelement <4 x float> zeroinitializer, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good4
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[ADD:.*]] = fadd <4 x float> %[[INS]], %[[INS]]
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[ADD]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good4(float %arg) {
+  %tmp = insertelement <4 x float> zeroinitializer, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  %tmp7 = fadd <4 x float> %tmp6, %tmp6
+  ret <4 x float> %tmp7
+}
+
+; CHECK-LABEL: @good5(
+; CHECK-NEXT:    %ins1 = insertelement <4 x float> undef, float %v, i32 0
+; CHECK-NEXT:    %a1 = fadd <4 x float> %ins1, %ins1
+; CHECK-NEXT:    %ins4 = shufflevector <4 x float> %ins1, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    %res = fadd <4 x float> %a1, %ins4
+; CHECK-NEXT: ret <4 x float> %res
+define <4 x float> @good5(float %v) {
+  %ins1 = insertelement <4 x float> undef, float %v, i32 0
+  %a1 = fadd <4 x float> %ins1, %ins1
+  %ins2 = insertelement<4 x float> %ins1, float %v, i32 1
+  %ins3 = insertelement<4 x float> %ins2, float %v, i32 2
+  %ins4 = insertelement<4 x float> %ins3, float %v, i32 3
+  %res = fadd <4 x float> %a1, %ins4
+  ret <4 x float> %res
+}
+
+; CHECK-LABEL: bad1
+; CHECK-NOT: shufflevector
+define <4 x float> @bad1(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 1
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad2
+; CHECK-NOT: shufflevector
+define <4 x float> @bad2(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp5 = insertelement <4 x float> %tmp, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad3
+; CHECK-NOT: shufflevector
+define <4 x float> @bad3(float %arg, float %arg2) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg2, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad4
+; CHECK-NOT: shufflevector
+define <1 x float> @bad4(float %arg) {
+  %tmp = insertelement <1 x float> undef, float %arg, i32 0
+  ret <1 x float> %tmp
+}
+
+; CHECK-LABEL: bad5
+; CHECK-NOT: shufflevector
+define <4 x float> @bad5(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  %tmp7 = fadd <4 x float> %tmp6, %tmp4
+  ret <4 x float> %tmp7
+}
+
+; CHECK-LABEL: bad6
+; CHECK-NOT: shufflevector
+define <4 x float> @bad6(float %arg, i32 %k) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 %k
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: @bad7(
+; CHECK-NOT: shufflevector
+define <4 x float> @bad7(float %v) {
+  %ins1 = insertelement <4 x float> undef, float %v, i32 1
+  %a1 = fadd <4 x float> %ins1, %ins1
+  %ins2 = insertelement<4 x float> %ins1, float %v, i32 2
+  %ins3 = insertelement<4 x float> %ins2, float %v, i32 3
+  %ins4 = insertelement<4 x float> %ins3, float %v, i32 0
+  %res = fadd <4 x float> %a1, %ins4
+  ret <4 x float> %res
+}
diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
new file mode 100644
index 00000000000..8fdecb628b8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -0,0 +1,337 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; rdar://5992453
+; A & 255
+define i32 @test4(i32 %a) nounwind  {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 %a, 255
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
+  %tmp4 = lshr i32 %tmp2, 24
+  ret i32 %tmp4
+}
+
+; a >> 24
+define i32 @test6(i32 %a) nounwind {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 %a, 24
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
+  %tmp4 = and i32 %tmp2, 255
+  ret i32 %tmp4
+}
+
+; PR5284
+define i16 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 %A, 16
+; CHECK-NEXT:    [[D:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[D]]
+;
+  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
+  %C = trunc i32 %B to i16
+  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
+  ret i16 %D
+}
+
+define i16 @test8(i64 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 %A, 48
+; CHECK-NEXT:    [[D:%.*]] = trunc i64 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[D]]
+;
+  %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind
+  %C = trunc i64 %B to i16
+  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
+  ret i16 %D
+}
+
+; Misc: Fold bswap(undef) to undef.
+define i64 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    ret i64 undef
+;
+  %a = call i64 @llvm.bswap.i64(i64 undef)
+  ret i64 %a
+}
+
+; PR15782
+; Fold: OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
+; Fold: OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
+define i16 @bs_and16i(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_and16i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 %a, 4391
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %2 = and i16 %1, 10001
+  ret i16 %2
+}
+
+define i16 @bs_and16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_and16(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = and i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i16 @bs_or16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_or16(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = or i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i16 @bs_xor16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_xor16(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = xor i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i32 @bs_and32i(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_and32i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, -1585053440
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = and i32 %tmp1, 100001
+  ret i32 %tmp2
+}
+
+define i32 @bs_and32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_and32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = and i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @bs_or32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_or32(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = or i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @bs_xor32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_xor32(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = xor i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i64 @bs_and64i(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 %a, 129085117527228416
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = and i64 %tmp1, 1000000001
+  ret i64 %tmp2
+}
+
+define i64 @bs_and64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @bs_or64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_or64(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = or i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @bs_xor64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_xor64(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = xor i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define <2 x i32> @bs_and32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_and32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = and <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_or32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_or32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = or <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_xor32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = xor <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_and32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = and <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_or32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = or <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_xor32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = xor <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define i64 @bs_and64_multiuse1(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps
+  %tmp5 = mul i64 %tmp4, %tmp2 ; to increase use count of the bswaps
+  ret i64 %tmp5
+}
+
+define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse2(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[A]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps
+  ret i64 %tmp4
+}
+
+define i64 @bs_and64_multiuse3(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse3(
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp2 ; to increase use count of the bswaps
+  ret i64 %tmp4
+}
+
+define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64i_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], 1000000001
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = and i64 %tmp1, 1000000001
+  %tmp3 = mul i64 %tmp2, %tmp1 ; to increase use count of the bswap
+  ret i64 %tmp3
+}
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
diff --git a/llvm/test/Transforms/InstCombine/bswap-known-bits.ll b/llvm/test/Transforms/InstCombine/bswap-known-bits.ll
new file mode 100644
index 00000000000..1f3285af65c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bswap-known-bits.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+; Note: This is testing functionality in computeKnownBits.  I'd have rather
+; used instsimplify, but the bit test folding is apparently only in instcombine.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+
+define i1 @test1(i16 %arg) {
+; CHECK-LABEL: @test1
+; CHECK: ret i1 true
+  %a = or i16 %arg, 511
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 256
+  %res = icmp eq i16 %and, 256
+  ret i1 %res
+}
+
+define i1 @test2(i16 %arg) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+  %a = or i16 %arg, 1
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 256
+  %res = icmp eq i16 %and, 256
+  ret i1 %res
+}
+
+
+define i1 @test3(i16 %arg) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 true
+  %a = or i16 %arg, 256
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 1
+  %res = icmp eq i16 %and, 1
+  ret i1 %res
+}
+
+define i1 @test4(i32 %arg) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 true
+  %a = or i32 %arg, 2147483647  ; i32_MAX
+  %b = call i32 @llvm.bswap.i32(i32 %a)
+  %and = and i32 %b, 127
+  %res = icmp eq i32 %and, 127
+  ret i1 %res
+}
diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll
new file mode 100644
index 00000000000..69be38df415
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bswap.ll
@@ -0,0 +1,232 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+define i32 @test1(i32 %i) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 %i)
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
+  %tmp1 = lshr i32 %i, 24
+  %tmp3 = lshr i32 %i, 8
+  %tmp4 = and i32 %tmp3, 65280
+  %tmp5 = or i32 %tmp1, %tmp4
+  %tmp7 = shl i32 %i, 8
+  %tmp8 = and i32 %tmp7, 16711680
+  %tmp9 = or i32 %tmp5, %tmp8
+  %tmp11 = shl i32 %i, 24
+  %tmp12 = or i32 %tmp9, %tmp11
+  ret i32 %tmp12
+}
+
+define i32 @test2(i32 %arg) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.bswap.i32(i32 %arg)
+; CHECK-NEXT:    ret i32 [[TMP14]]
+;
+  %tmp2 = shl i32 %arg, 24
+  %tmp4 = shl i32 %arg, 8
+  %tmp5 = and i32 %tmp4, 16711680
+  %tmp6 = or i32 %tmp2, %tmp5
+  %tmp8 = lshr i32 %arg, 8
+  %tmp9 = and i32 %tmp8, 65280
+  %tmp10 = or i32 %tmp6, %tmp9
+  %tmp12 = lshr i32 %arg, 24
+  %tmp14 = or i32 %tmp10, %tmp12
+  ret i32 %tmp14
+}
+
+define i16 @test3(i16 %s) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 %s)
+; CHECK-NEXT:    ret i16 [[TMP5]]
+;
+  %tmp2 = lshr i16 %s, 8
+  %tmp4 = shl i16 %s, 8
+  %tmp5 = or i16 %tmp2, %tmp4
+  ret i16 %tmp5
+}
+
+define i16 @test4(i16 %s) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 %s)
+; CHECK-NEXT:    ret i16 [[TMP5]]
+;
+  %tmp2 = lshr i16 %s, 8
+  %tmp4 = shl i16 %s, 8
+  %tmp5 = or i16 %tmp4, %tmp2
+  ret i16 %tmp5
+}
+
+define i16 @test5(i16 %a) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP_UPGRD_3:%.*]] = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK-NEXT:    ret i16 [[TMP_UPGRD_3]]
+;
+  %tmp = zext i16 %a to i32
+  %tmp1 = and i32 %tmp, 65280
+  %tmp2 = ashr i32 %tmp1, 8
+  %tmp2.upgrd.1 = trunc i32 %tmp2 to i16
+  %tmp4 = and i32 %tmp, 255
+  %tmp5 = shl i32 %tmp4, 8
+  %tmp5.upgrd.2 = trunc i32 %tmp5 to i16
+  %tmp.upgrd.3 = or i16 %tmp2.upgrd.1, %tmp5.upgrd.2
+  %tmp6 = bitcast i16 %tmp.upgrd.3 to i16
+  %tmp6.upgrd.4 = zext i16 %tmp6 to i32
+  %retval = trunc i32 %tmp6.upgrd.4 to i16
+  ret i16 %retval
+}
+
+; PR2842
+define i32 @test6(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    ret i32 [[TMP7]]
+;
+  %tmp = shl i32 %x, 16
+  %x.mask = and i32 %x, 65280
+  %tmp1 = lshr i32 %x, 16
+  %tmp2 = and i32 %tmp1, 255
+  %tmp3 = or i32 %x.mask, %tmp
+  %tmp4 = or i32 %tmp3, %tmp2
+  %tmp5 = shl i32 %tmp4, 8
+  %tmp6 = lshr i32 %x, 24
+  %tmp7 = or i32 %tmp5, %tmp6
+  ret i32 %tmp7
+}
+
+declare void @extra_use(i32)
+
+; swaphalf = (x << 16 | x >> 16)
+; ((swaphalf & 0x00ff00ff) << 8) | ((swaphalf >> 8) & 0x00ff00ff)
+
+define i32 @bswap32_and_first(i32 %x) {
+; CHECK-LABEL: @bswap32_and_first(
+; CHECK-NEXT:    [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    ret i32 [[BSWAP]]
+;
+  %shl = shl i32 %x, 16
+  %shr = lshr i32 %x, 16
+  %swaphalf = or i32 %shl, %shr
+  %t = and i32 %swaphalf, 16711935
+  %tshl = shl nuw i32 %t, 8
+  %b = lshr i32 %swaphalf, 8
+  %band = and i32 %b, 16711935
+  %bswap = or i32 %tshl, %band
+  ret i32 %bswap
+}
+
+; Extra use should not prevent matching to bswap.
+; swaphalf = (x << 16 | x >> 16)
+; ((swaphalf & 0x00ff00ff) << 8) | ((swaphalf >> 8) & 0x00ff00ff)
+
+define i32 @bswap32_and_first_extra_use(i32 %x) {
+; CHECK-LABEL: @bswap32_and_first_extra_use(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 %x, 16
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 %x, 16
+; CHECK-NEXT:    [[SWAPHALF:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[T:%.*]] = and i32 [[SWAPHALF]], 16711935
+; CHECK-NEXT:    [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    call void @extra_use(i32 [[T]])
+; CHECK-NEXT:    ret i32 [[BSWAP]]
+;
+  %shl = shl i32 %x, 16
+  %shr = lshr i32 %x, 16
+  %swaphalf = or i32 %shl, %shr
+  %t = and i32 %swaphalf, 16711935
+  %tshl = shl nuw i32 %t, 8
+  %b = lshr i32 %swaphalf, 8
+  %band = and i32 %b, 16711935
+  %bswap = or i32 %tshl, %band
+  call void @extra_use(i32 %t)
+  ret i32 %bswap
+}
+
+; swaphalf = (x << 16 | x >> 16)
+; ((swaphalf << 8) & 0xff00ff00) | ((swaphalf >> 8) & 0x00ff00ff)
+
+; PR23863
+define i32 @bswap32_shl_first(i32 %x) {
+; CHECK-LABEL: @bswap32_shl_first(
+; CHECK-NEXT:    [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    ret i32 [[BSWAP]]
+;
+  %shl = shl i32 %x, 16
+  %shr = lshr i32 %x, 16
+  %swaphalf = or i32 %shl, %shr
+  %t = shl i32 %swaphalf, 8
+  %tand = and i32 %t, -16711936
+  %b = lshr i32 %swaphalf, 8
+  %band = and i32 %b, 16711935
+  %bswap = or i32 %tand, %band
+  ret i32 %bswap
+}
+
+; Extra use should not prevent matching to bswap.
+; swaphalf = (x << 16 | x >> 16)
+; ((swaphalf << 8) & 0xff00ff00) | ((swaphalf >> 8) & 0x00ff00ff)
+
+define i32 @bswap32_shl_first_extra_use(i32 %x) {
+; CHECK-LABEL: @bswap32_shl_first_extra_use(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 %x, 16
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 %x, 16
+; CHECK-NEXT:    [[SWAPHALF:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[T:%.*]] = shl i32 [[SWAPHALF]], 8
+; CHECK-NEXT:    [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    call void @extra_use(i32 [[T]])
+; CHECK-NEXT:    ret i32 [[BSWAP]]
+;
+  %shl = shl i32 %x, 16
+  %shr = lshr i32 %x, 16
+  %swaphalf = or i32 %shl, %shr
+  %t = shl i32 %swaphalf, 8
+  %tand = and i32 %t, -16711936
+  %b = lshr i32 %swaphalf, 8
+  %band = and i32 %b, 16711935
+  %bswap = or i32 %tand, %band
+  call void @extra_use(i32 %t)
+  ret i32 %bswap
+}
+
+define i16 @test8(i16 %a) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK-NEXT:    ret i16 [[REV]]
+;
+  %conv = zext i16 %a to i32
+  %shr = lshr i16 %a, 8
+  %shl = shl i32 %conv, 8
+  %conv1 = zext i16 %shr to i32
+  %or = or i32 %conv1, %shl
+  %conv2 = trunc i32 %or to i16
+  ret i16 %conv2
+}
+
+define i16 @test9(i16 %a) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK-NEXT:    ret i16 [[REV]]
+;
+  %conv = zext i16 %a to i32
+  %shr = lshr i32 %conv, 8
+  %shl = shl i32 %conv, 8
+  %or = or i32 %shr, %shl
+  %conv2 = trunc i32 %or to i16
+  ret i16 %conv2
+}
+
+define i16 @test10(i32 %a) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 %a to i16
+; CHECK-NEXT:    [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]])
+; CHECK-NEXT:    ret i16 [[REV]]
+;
+  %shr1 = lshr i32 %a, 8
+  %and1 = and i32 %shr1, 255
+  %and2 = shl i32 %a, 8
+  %shl1 = and i32 %and2, 65280
+  %or = or i32 %and1, %shl1
+  %conv = trunc i32 %or to i16
+  ret i16 %conv
+}
+
diff --git a/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll b/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll
new file mode 100644
index 00000000000..eabe3a4c4b7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll
@@ -0,0 +1,117 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s --dump-input-on-failure
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i64 @weird_identity_but_ok(i64 %sz) {
+entry:
+  %call = tail call i8* @malloc(i64 %sz)
+  %calc_size = tail call i64 @llvm.objectsize.i64.p0i8(i8* %call, i1 false, i1 true, i1 true)
+  tail call void @free(i8* %call)
+  ret i64 %calc_size
+}
+
+; CHECK:      define i64 @weird_identity_but_ok(i64 %sz)
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   ret i64 %sz
+; CHECK-NEXT: }
+
+define i64 @phis_are_neat(i1 %which) {
+entry:
+  br i1 %which, label %first_label, label %second_label
+
+first_label:
+  %first_call = call i8* @malloc(i64 10)
+  br label %join_label
+
+second_label:
+  %second_call = call i8* @malloc(i64 30)
+  br label %join_label
+
+join_label:
+  %joined = phi i8* [ %first_call, %first_label ], [ %second_call, %second_label ]
+  %calc_size = tail call i64 @llvm.objectsize.i64.p0i8(i8* %joined, i1 false, i1 true, i1 true)
+  ret i64 %calc_size
+}
+
+; CHECK:      %0 = phi i64 [ 10, %first_label ], [ 30, %second_label ]
+; CHECK-NEXT: ret i64 %0
+
+define i64 @internal_pointer(i64 %sz) {
+entry:
+  %ptr = call i8* @malloc(i64 %sz)
+  %ptr2 = getelementptr inbounds i8, i8* %ptr, i32 2
+  %calc_size = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr2, i1 false, i1 true, i1 true)
+  ret i64 %calc_size
+}
+
+; CHECK:      define i64 @internal_pointer(i64 %sz)
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = add i64 %sz, -2
+; CHECK-NEXT:   %1 = icmp ult i64 %sz, 2
+; CHECK-NEXT:   %2 = select i1 %1, i64 0, i64 %0
+; CHECK-NEXT:   ret i64 %2
+; CHECK-NEXT: }
+
+define i64 @uses_nullptr_no_fold() {
+entry:
+  %res = call i64 @llvm.objectsize.i64.p0i8(i8* null, i1 false, i1 true, i1 true)
+  ret i64 %res
+}
+
+; CHECK: %res = call i64 @llvm.objectsize.i64.p0i8(i8* null, i1 false, i1 true, i1 true)
+
+define i64 @uses_nullptr_fold() {
+entry:
+  ; NOTE: the third parameter to this call is false, unlike above.
+  %res = call i64 @llvm.objectsize.i64.p0i8(i8* null, i1 false, i1 false, i1 true)
+  ret i64 %res
+}
+
+; CHECK: ret i64 0
+
+@d = common global i8 0, align 1
+@c = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @f() {
+entry:
+  %.pr = load i32, i32* @c, align 4
+  %tobool4 = icmp eq i32 %.pr, 0
+  br i1 %tobool4, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %dp.05 = phi i8* [ %add.ptr, %for.body ], [ @d, %entry ]
+  %0 = tail call i64 @llvm.objectsize.i64.p0i8(i8* %dp.05, i1 false, i1 true, i1 true)
+  %conv = trunc i64 %0 to i32
+  tail call void @bury(i32 %conv) #3
+  %1 = load i32, i32* @c, align 4
+  %idx.ext = sext i32 %1 to i64
+  %add.ptr.offs = add i64 %idx.ext, 0
+  %2 = add i64 undef, %add.ptr.offs
+  %add.ptr = getelementptr inbounds i8, i8* %dp.05, i64 %idx.ext
+  %add = shl nsw i32 %1, 1
+  store i32 %add, i32* @c, align 4
+  %tobool = icmp eq i32 %1, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK:   define void @f()
+; CHECK:     call i64 @llvm.objectsize.i64.p0i8(
+
+declare void @bury(i32) local_unnamed_addr #2
+
+; Function Attrs: nounwind allocsize(0)
+declare i8* @malloc(i64)
+
+declare i8* @get_unknown_buffer()
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture)
+
+; Function Attrs: nounwind readnone speculatable
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
diff --git a/llvm/test/Transforms/InstCombine/builtin-object-size-offset.ll b/llvm/test/Transforms/InstCombine/builtin-object-size-offset.ll
new file mode 100644
index 00000000000..248cf644df8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/builtin-object-size-offset.ll
@@ -0,0 +1,58 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; #include <stdlib.h>
+; #include <stdio.h>
+;
+; int foo1(int N) {
+;   char Big[20];
+;   char Small[10];
+;   char *Ptr = N ? Big + 10 : Small;
+;   return __builtin_object_size(Ptr, 0);
+; }
+;
+; void foo() {
+;   size_t ret;
+;   ret = foo1(0);
+;   printf("\n %d", ret);
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [5 x i8] c"\0A %d\00", align 1
+
+define i32 @foo1(i32 %N) {
+entry:
+  %Big = alloca [20 x i8], align 16
+  %Small = alloca [10 x i8], align 1
+  %0 = getelementptr inbounds [20 x i8], [20 x i8]* %Big, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 20, i8* %0)
+  %1 = getelementptr inbounds [10 x i8], [10 x i8]* %Small, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 10, i8* %1)
+  %tobool = icmp ne i32 %N, 0
+  %add.ptr = getelementptr inbounds [20 x i8], [20 x i8]* %Big, i64 0, i64 10
+  %cond = select i1 %tobool, i8* %add.ptr, i8* %1
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cond, i1 false)
+  %conv = trunc i64 %2 to i32
+  call void @llvm.lifetime.end.p0i8(i64 10, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 20, i8* %0)
+  ret i32 %conv
+; CHECK: ret i32 10 
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define void @foo() {
+entry:
+  %call = tail call i32 @foo1(i32 0)
+  %conv = sext i32 %call to i64
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %conv)
+  ret void
+}
+
+declare i32 @printf(i8* nocapture readonly, ...)
+
diff --git a/llvm/test/Transforms/InstCombine/builtin-object-size-ptr.ll b/llvm/test/Transforms/InstCombine/builtin-object-size-ptr.ll
new file mode 100644
index 00000000000..ada3fc16702
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/builtin-object-size-ptr.ll
@@ -0,0 +1,34 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; int foo() {
+; struct V { char buf1[10];
+;            int b;
+;            char buf2[10];
+;           } var;
+;
+;           char *p = &var.buf1[1];
+;           return __builtin_object_size (p, 0);
+; }
+
+%struct.V = type { [10 x i8], i32, [10 x i8] }
+
+define i32 @foo() #0 {
+entry:
+  %var = alloca %struct.V, align 4
+  %0 = bitcast %struct.V* %var to i8*
+  call void @llvm.lifetime.start.p0i8(i64 28, i8* %0) #3
+  %buf1 = getelementptr inbounds %struct.V, %struct.V* %var, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buf1, i64 0, i64 1
+  %1 = call i64 @llvm.objectsize.i64.p0i8(i8* %arrayidx, i1 false)
+  %conv = trunc i64 %1 to i32
+  call void @llvm.lifetime.end.p0i8(i64 28, i8* %0) #3
+  ret i32 %conv
+; CHECK: ret i32 27
+; CHECK-NOT: ret i32 -1
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #2
+
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
diff --git a/llvm/test/Transforms/InstCombine/cabs-array.ll b/llvm/test/Transforms/InstCombine/cabs-array.ll
new file mode 100644
index 00000000000..1c15dc1c545
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cabs-array.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @std_cabs([2 x double] %z) {
+; CHECK-LABEL: define double @std_cabs(
+; CHECK: tail call double @cabs(
+  %call = tail call double @cabs([2 x double] %z)
+  ret double %call
+}
+
+define float @std_cabsf([2 x float] %z) {
+; CHECK-LABEL: define float @std_cabsf(
+; CHECK: tail call float @cabsf(
+  %call = tail call float @cabsf([2 x float] %z)
+  ret float %call
+}
+
+define fp128 @std_cabsl([2 x fp128] %z) {
+; CHECK-LABEL: define fp128 @std_cabsl(
+; CHECK: tail call fp128 @cabsl(
+  %call = tail call fp128 @cabsl([2 x fp128] %z)
+  ret fp128 %call
+}
+
+define double @fast_cabs([2 x double] %z) {
+; CHECK-LABEL: define double @fast_cabs(
+; CHECK: %real = extractvalue [2 x double] %z, 0
+; CHECK: %imag = extractvalue [2 x double] %z, 1
+; CHECK: %1 = fmul fast double %real, %real
+; CHECK: %2 = fmul fast double %imag, %imag
+; CHECK: %3 = fadd fast double %1, %2
+; CHECK: %cabs = call fast double @llvm.sqrt.f64(double %3)
+; CHECK: ret double %cabs
+  %call = tail call fast double @cabs([2 x double] %z)
+  ret double %call
+}
+
+define float @fast_cabsf([2 x float] %z) {
+; CHECK-LABEL: define float @fast_cabsf(
+; CHECK: %real = extractvalue [2 x float] %z, 0
+; CHECK: %imag = extractvalue [2 x float] %z, 1
+; CHECK: %1 = fmul fast float %real, %real
+; CHECK: %2 = fmul fast float %imag, %imag
+; CHECK: %3 = fadd fast float %1, %2
+; CHECK: %cabs = call fast float @llvm.sqrt.f32(float %3)
+; CHECK: ret float %cabs
+  %call = tail call fast float @cabsf([2 x float] %z)
+  ret float %call
+}
+
+define fp128 @fast_cabsl([2 x fp128] %z) {
+; CHECK-LABEL: define fp128 @fast_cabsl(
+; CHECK: %real = extractvalue [2 x fp128] %z, 0
+; CHECK: %imag = extractvalue [2 x fp128] %z, 1
+; CHECK: %1 = fmul fast fp128 %real, %real
+; CHECK: %2 = fmul fast fp128 %imag, %imag
+; CHECK: %3 = fadd fast fp128 %1, %2
+; CHECK: %cabs = call fast fp128 @llvm.sqrt.f128(fp128 %3)
+; CHECK: ret fp128 %cabs
+  %call = tail call fast fp128 @cabsl([2 x fp128] %z)
+  ret fp128 %call
+}
+
+declare double @cabs([2 x double])
+declare float @cabsf([2 x float])
+declare fp128 @cabsl([2 x fp128])
diff --git a/llvm/test/Transforms/InstCombine/cabs-discrete.ll b/llvm/test/Transforms/InstCombine/cabs-discrete.ll
new file mode 100644
index 00000000000..405c073c194
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cabs-discrete.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @std_cabs(double %real, double %imag) {
+; CHECK-LABEL: define double @std_cabs(
+; CHECK: tail call double @cabs(
+  %call = tail call double @cabs(double %real, double %imag)
+  ret double %call
+}
+
+define float @std_cabsf(float %real, float %imag) {
+; CHECK-LABEL: define float @std_cabsf(
+; CHECK: tail call float @cabsf(
+  %call = tail call float @cabsf(float %real, float %imag)
+  ret float %call
+}
+
+define fp128 @std_cabsl(fp128 %real, fp128 %imag) {
+; CHECK-LABEL: define fp128 @std_cabsl(
+; CHECK: tail call fp128 @cabsl(
+  %call = tail call fp128 @cabsl(fp128 %real, fp128 %imag)
+  ret fp128 %call
+}
+
+define double @fast_cabs(double %real, double %imag) {
+; CHECK-LABEL: define double @fast_cabs(
+; CHECK: %1 = fmul fast double %real, %real
+; CHECK: %2 = fmul fast double %imag, %imag
+; CHECK: %3 = fadd fast double %1, %2
+; CHECK: %cabs = call fast double @llvm.sqrt.f64(double %3)
+; CHECK: ret double %cabs
+  %call = tail call fast double @cabs(double %real, double %imag)
+  ret double %call
+}
+
+define float @fast_cabsf(float %real, float %imag) {
+; CHECK-LABEL: define float @fast_cabsf(
+; CHECK: %1 = fmul fast float %real, %real
+; CHECK: %2 = fmul fast float %imag, %imag
+; CHECK: %3 = fadd fast float %1, %2
+; CHECK: %cabs = call fast float @llvm.sqrt.f32(float %3)
+; CHECK: ret float %cabs
+  %call = tail call fast float @cabsf(float %real, float %imag)
+  ret float %call
+}
+
+define fp128 @fast_cabsl(fp128 %real, fp128 %imag) {
+; CHECK-LABEL: define fp128 @fast_cabsl(
+; CHECK: %1 = fmul fast fp128 %real, %real
+; CHECK: %2 = fmul fast fp128 %imag, %imag
+; CHECK: %3 = fadd fast fp128 %1, %2
+; CHECK: %cabs = call fast fp128 @llvm.sqrt.f128(fp128 %3)
+; CHECK: ret fp128 %cabs
+  %call = tail call fast fp128 @cabsl(fp128 %real, fp128 %imag)
+  ret fp128 %call
+}
+
+declare double @cabs(double %real, double %imag)
+declare float @cabsf(float %real, float %imag)
+declare fp128 @cabsl(fp128 %real, fp128 %imag)
diff --git a/llvm/test/Transforms/InstCombine/call-callconv.ll b/llvm/test/Transforms/InstCombine/call-callconv.ll
new file mode 100644
index 00000000000..0cb2c55f9fd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call-callconv.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Verify that the non-default calling conv doesn't prevent the libcall simplification
+
+@.str = private unnamed_addr constant [4 x i8] c"abc\00", align 1
+
+define arm_aapcscc i32 @_abs(i32 %i) nounwind readnone {
+; CHECK-LABEL: @_abs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[I:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[I]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[NEG]], i32 [[I]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %call = tail call arm_aapcscc i32 @abs(i32 %i) nounwind readnone
+  ret i32 %call
+}
+
+declare arm_aapcscc i32 @abs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_labs(i32 %i) nounwind readnone {
+; CHECK-LABEL: @_labs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[I:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[I]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[NEG]], i32 [[I]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %call = tail call arm_aapcscc i32 @labs(i32 %i) nounwind readnone
+  ret i32 %call
+}
+
+declare arm_aapcscc i32 @labs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_strlen1() {
+; CHECK-LABEL: @_strlen1(
+; CHECK-NEXT:    ret i32 3
+;
+  %call = tail call arm_aapcscc i32 @strlen(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0))
+  ret i32 %call
+}
+
+declare arm_aapcscc i32 @strlen(i8*)
+
+define arm_aapcscc zeroext i1 @_strlen2(i8* %str) {
+; CHECK-LABEL: @_strlen2(
+; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i8, i8* [[STR:%.*]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[STRLENFIRST]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = tail call arm_aapcscc i32 @strlen(i8* %str)
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/call-cast-attrs.ll b/llvm/test/Transforms/InstCombine/call-cast-attrs.ll
new file mode 100644
index 00000000000..ddaf90c3e74
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call-cast-attrs.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define signext i32 @b(i32* inreg %x)   {
+  ret i32 0
+}
+
+define void @c(...) {
+  ret void
+}
+
+declare void @useit(i32)
+
+define void @d(i32 %x, ...) {
+  call void @useit(i32 %x)
+  ret void
+}
+
+define void @g(i32* %y) {
+  call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)(i32 zeroext 0)
+  call void bitcast (void (...)* @c to void (i32*)*)(i32* %y)
+  call void bitcast (void (...)* @c to void (i32*)*)(i32* sret %y)
+  call void bitcast (void (i32, ...)* @d to void (i32, i32*)*)(i32 0, i32* sret %y)
+  ret void
+}
+; CHECK-LABEL: define void @g(i32* %y)
+; CHECK: call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)(i32 zeroext 0)
+; CHECK: call void (...) @c(i32* %y)
+; CHECK: call void bitcast (void (...)* @c to void (i32*)*)(i32* sret %y)
+; CHECK: call void bitcast (void (i32, ...)* @d to void (i32, i32*)*)(i32 0, i32* sret %y)
diff --git a/llvm/test/Transforms/InstCombine/call-cast-target-inalloca.ll b/llvm/test/Transforms/InstCombine/call-cast-target-inalloca.ll
new file mode 100644
index 00000000000..90289e2468f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call-cast-target-inalloca.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+declare void @takes_i32(i32)
+declare void @takes_i32_inalloca(i32* inalloca)
+
+define void @f() {
+; CHECK-LABEL: define void @f()
+  %args = alloca inalloca i32
+  call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* inalloca %args)
+; CHECK: call void bitcast
+  ret void
+}
+
+define void @g() {
+; CHECK-LABEL: define void @g()
+  call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
+; CHECK: call void bitcast
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/call-cast-target.ll b/llvm/test/Transforms/InstCombine/call-cast-target.ll
new file mode 100644
index 00000000000..881e80762ea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call-cast-target.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @main() {
+; CHECK-LABEL: @main
+; CHECK: %[[call:.*]] = call i8* @ctime(i32* null)
+; CHECK: %[[cast:.*]] = ptrtoint i8* %[[call]] to i32
+; CHECK: ret i32 %[[cast]]
+entry:
+  %tmp = call i32 bitcast (i8* (i32*)* @ctime to i32 (i32*)*)( i32* null )          ; <i32> [#uses=1]
+  ret i32 %tmp
+}
+
+declare i8* @ctime(i32*)
+
+define internal { i8 } @foo(i32*) {
+entry:
+  ret { i8 } { i8 0 }
+}
+
+define void @test_struct_ret() {
+; CHECK-LABEL: @test_struct_ret
+; CHECK-NOT: bitcast
+entry:
+  %0 = call { i8 } bitcast ({ i8 } (i32*)* @foo to { i8 } (i16*)*)(i16* null)
+  ret void
+}
+
+declare i32 @fn1(i32)
+
+define i32 @test1(i32* %a) {
+; CHECK-LABEL: @test1
+; CHECK:      %[[cast:.*]] = ptrtoint i32* %a to i32
+; CHECK-NEXT: %[[call:.*]] = tail call i32 @fn1(i32 %[[cast]])
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i32)* @fn1 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn2(i16)
+
+define i32 @test2(i32* %a) {
+; CHECK-LABEL: @test2
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i16)* @fn2 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i16)* @fn2 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn3(i64)
+
+define i32 @test3(i32* %a) {
+; CHECK-LABEL: @test3
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn4(i32) "thunk"
+
+define i32 @test4(i32* %a) {
+; CHECK-LABEL: @test4
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i32)* @fn4 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i32)* @fn4 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i1 @fn5({ i32, i32 }* byval align 4 %r)
+
+define i1 @test5() {
+; CHECK-LABEL: @test5
+; CHECK:      %[[call:.*]] = call i1 bitcast (i1 ({ i32, i32 }*)* @fn5 to i1 (i32, i32)*)(i32 {{.*}}, i32 {{.*}})
+; CHECK-NEXT: ret i1 %[[call]]
+  %1 = alloca { i32, i32 }, align 4
+  %2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %1, i32 0, i32 0
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %1, i32 0, i32 1
+  %5 = load i32, i32* %4, align 4
+  %6 = call i1 bitcast (i1 ({ i32, i32 }*)* @fn5 to i1 (i32, i32)*)(i32 %3, i32 %5)
+  ret i1 %6
+}
diff --git a/llvm/test/Transforms/InstCombine/call-guard.ll b/llvm/test/Transforms/InstCombine/call-guard.ll
new file mode 100644
index 00000000000..8101f4571de
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call-guard.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test_guard_adjacent_same_cond(i1 %A) {
+; CHECK-LABEL: @test_guard_adjacent_same_cond(
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %A) [ "deopt"() ]
+; CHECK-NEXT:    ret void
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  ret void
+}
+
+define void @test_guard_adjacent_diff_cond(i1 %A, i1 %B, i1 %C) {
+; CHECK-LABEL: @test_guard_adjacent_diff_cond(
+; CHECK-NEXT:    %1 = and i1 %A, %B
+; CHECK-NEXT:    %2 = and i1 %1, %C
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %2, i32 123) [ "deopt"() ]
+; CHECK-NEXT:    ret void
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %C, i32 789 )[ "deopt"() ]
+  ret void
+}
+
+; This version tests for the common form where the conditions are
+; between the guards
+define void @test_guard_adjacent_diff_cond2(i32 %V1, i32 %V2) {
+; CHECK-LABEL: @test_guard_adjacent_diff_cond2(
+; CHECK-NEXT:    %1 = and i32 %V1, %V2
+; CHECK-NEXT:    %2 = icmp slt i32 %1, 0
+; CHECK-NEXT:    %and = and i32 %V1, 255
+; CHECK-NEXT:    %C = icmp ult i32 %and, 129
+; CHECK-NEXT:    %3 = and i1 %2, %C
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %3, i32 123) [ "deopt"() ]
+; CHECK-NEXT:    ret void
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %B = icmp slt i32 %V2, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  %and = and i32 %V1, 255
+  %C = icmp sle i32 %and, 128
+  call void(i1, ...) @llvm.experimental.guard( i1 %C, i32 789 )[ "deopt"() ]
+  ret void
+}
+
+; Might not be legal to hoist the load above the first guard since the
+; guard might control dereferenceability
+define void @negative_load(i32 %V1, i32* %P) {
+; CHECK-LABEL: @negative_load
+; CHECK:    @llvm.experimental.guard
+; CHECK:    @llvm.experimental.guard
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %V2 = load i32, i32* %P
+  %B = icmp slt i32 %V2, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  ret void
+}
+
+define void @deref_load(i32 %V1, i32* dereferenceable(4) %P) {
+; CHECK-LABEL: @deref_load
+; CHECK-NEXT:  %V2 = load i32, i32* %P, align 4
+; CHECK-NEXT:  %1 = and i32 %V2, %V1
+; CHECK-NEXT:  %2 = icmp slt i32 %1, 0
+; CHECK-NEXT:  call void (i1, ...) @llvm.experimental.guard(i1 %2, i32 123) [ "deopt"() ]
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %V2 = load i32, i32* %P
+  %B = icmp slt i32 %V2, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  ret void
+}
+
+; The divide might fault above the guard
+define void @negative_div(i32 %V1, i32 %D) {
+; CHECK-LABEL: @negative_div
+; CHECK:    @llvm.experimental.guard
+; CHECK:    @llvm.experimental.guard
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %V2 = udiv i32 %V1, %D 
+  %B = icmp slt i32 %V2, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  ret void
+}
+
+; Highlight the limit of the window in a case which would otherwise be mergable
+define void @negative_window(i32 %V1, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @negative_window
+; CHECK:    @llvm.experimental.guard
+; CHECK:    @llvm.experimental.guard
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %V2 = add i32 %a, %b
+  %V3 = add i32 %V2, %c
+  %V4 = add i32 %V3, %d
+  %B = icmp slt i32 %V4, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/call-intrinsics.ll b/llvm/test/Transforms/InstCombine/call-intrinsics.ll
new file mode 100644
index 00000000000..1f327b99e9c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call-intrinsics.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine | llvm-dis
+
+@X = global i8 0                ; <i8*> [#uses=3]
+@Y = global i8 12               ; <i8*> [#uses=2]
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
+
+define void @zero_byte_test() {
+        ; These process zero bytes, so they are a noop.
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* align 128 @X, i8* align 128 @Y, i32 0, i1 false )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 @X, i8* align 128 @Y, i32 0, i1 false )
+        call void @llvm.memset.p0i8.i32(i8* align 128 @X, i8 123, i32 0, i1 false )
+        ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/call.ll b/llvm/test/Transforms/InstCombine/call.ll
new file mode 100644
index 00000000000..c494bfb62c7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call.ll
@@ -0,0 +1,300 @@
+; Ignore stderr, we expect warnings there
+; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-p1:16:16:16-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Simple case, argument translatable without changing the value
+declare void @test1a(i8*)
+
+define void @test1(i32* %A) {
+; CHECK-LABEL: @test1(
+; CHECK: %1 = bitcast i32* %A to i8*
+; CHECK: call void @test1a(i8* %1)
+; CHECK: ret void
+  call void bitcast (void (i8*)* @test1a to void (i32*)*)( i32* %A )
+  ret void
+}
+
+
+; Should not do because of change in address space of the parameter
+define void @test1_as1_illegal(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1_illegal(
+; CHECK: call void bitcast
+  call void bitcast (void (i8*)* @test1a to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A)
+  ret void
+}
+
+; Test1, but the argument has a different sized address-space
+declare void @test1a_as1(i8 addrspace(1)*)
+
+; This one is OK to perform
+define void @test1_as1(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: %1 = bitcast i32 addrspace(1)* %A to i8 addrspace(1)*
+; CHECK: call void @test1a_as1(i8 addrspace(1)* %1)
+; CHECK: ret void
+  call void bitcast (void (i8 addrspace(1)*)* @test1a_as1 to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A )
+  ret void
+}
+
+; More complex case, translate argument because of resolution.  This is safe
+; because we have the body of the function
+define void @test2a(i8 %A) {
+; CHECK-LABEL: @test2a(
+; CHECK: ret void
+  ret void
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK: call void bitcast
+; CHECK: ret i32 %A
+  call void bitcast (void (i8)* @test2a to void (i32)*)( i32 %A )
+  ret i32 %A
+}
+
+
+; Resolving this should insert a cast from sbyte to int, following the C
+; promotion rules.
+define void @test3a(i8, ...) {unreachable }
+
+define void @test3(i8 %A, i8 %B) {
+; CHECK-LABEL: @test3(
+; CHECK: %1 = zext i8 %B to i32
+; CHECK: call void (i8, ...) @test3a(i8 %A, i32 %1)
+; CHECK: ret void
+  call void bitcast (void (i8, ...)* @test3a to void (i8, i8)*)( i8 %A, i8 %B)
+  ret void
+}
+
+; test conversion of return value...
+define i8 @test4a() {
+; CHECK-LABEL: @test4a(
+; CHECK: ret i8 0
+  ret i8 0
+}
+
+define i32 @test4() {
+; CHECK-LABEL: @test4(
+; CHECK: call i32 bitcast
+  %X = call i32 bitcast (i8 ()* @test4a to i32 ()*)( )            ; <i32> [#uses=1]
+  ret i32 %X
+}
+
+; test conversion of return value... no value conversion occurs so we can do
+; this with just a prototype...
+declare i32 @test5a()
+
+define i32 @test5() {
+; CHECK-LABEL: @test5(
+; CHECK: %X = call i32 @test5a()
+; CHECK: ret i32 %X
+  %X = call i32 @test5a( )                ; <i32> [#uses=1]
+  ret i32 %X
+}
+
+; test addition of new arguments...
+declare i32 @test6a(i32)
+
+define i32 @test6() {
+; CHECK-LABEL: @test6(
+; CHECK: %X = call i32 @test6a(i32 0)
+; CHECK: ret i32 %X
+  %X = call i32 bitcast (i32 (i32)* @test6a to i32 ()*)( )
+  ret i32 %X
+}
+
+; test removal of arguments, only can happen with a function body
+define void @test7a() {
+; CHECK-LABEL: @test7a(
+; CHECK: ret void
+  ret void
+}
+
+define void @test7() {
+; CHECK-LABEL: @test7(
+; CHECK: call void @test7a()
+; CHECK: ret void
+  call void bitcast (void ()* @test7a to void (i32)*)( i32 5 )
+  ret void
+}
+
+
+; rdar://7590304
+declare void @test8a()
+
+define i8* @test8() personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: invoke void @test8a()
+; Don't turn this into "unreachable": the callee and caller don't agree in
+; calling conv, but the implementation of test8a may actually end up using the
+; right calling conv.
+  invoke void @test8a()
+          to label %invoke.cont unwind label %try.handler
+
+invoke.cont:                                      ; preds = %entry
+  unreachable
+
+try.handler:                                      ; preds = %entry
+  %exn = landingpad {i8*, i32}
+            cleanup
+  ret i8* null
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+
+; Don't turn this into a direct call, because test9x is just a prototype and
+; doing so will make it varargs.
+; rdar://9038601
+declare i8* @test9x(i8*, i8*, ...) noredzone
+define i8* @test9(i8* %arg, i8* %tmp3) nounwind ssp noredzone {
+; CHECK-LABEL: @test9
+entry:
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @test9x to i8* (i8*, i8*)*)(i8* %arg, i8* %tmp3) noredzone
+  ret i8* %call
+; CHECK-LABEL: @test9(
+; CHECK: call i8* bitcast
+}
+
+
+; Parameter that's a vector of pointers
+declare void @test10a(<2 x i8*>)
+
+define void @test10(<2 x i32*> %A) {
+; CHECK-LABEL: @test10(
+; CHECK: %1 = bitcast <2 x i32*> %A to <2 x i8*>
+; CHECK: call void @test10a(<2 x i8*> %1)
+; CHECK: ret void
+  call void bitcast (void (<2 x i8*>)* @test10a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Don't transform because different address spaces
+declare void @test10a_mixed_as(<2 x i8 addrspace(1)*>)
+
+define void @test10_mixed_as(<2 x i8*> %A) {
+; CHECK-LABEL: @test10_mixed_as(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8 addrspace(1)*>)* @test10a_mixed_as to void (<2 x i8*>)*)(<2 x i8*> %A)
+  ret void
+}
+
+; Return type that's a pointer
+define i8* @test11a() {
+  ret i8* zeroinitializer
+}
+
+define i32* @test11() {
+; CHECK-LABEL: @test11(
+; CHECK: %X = call i8* @test11a()
+; CHECK: %1 = bitcast i8* %X to i32*
+  %X = call i32* bitcast (i8* ()* @test11a to i32* ()*)()
+  ret i32* %X
+}
+
+; Return type that's a pointer with a different address space
+define i8 addrspace(1)* @test11a_mixed_as() {
+  ret i8 addrspace(1)* zeroinitializer
+}
+
+define i8* @test11_mixed_as() {
+; CHECK-LABEL: @test11_mixed_as(
+; CHECK: call i8* bitcast
+  %X = call i8* bitcast (i8 addrspace(1)* ()* @test11a_mixed_as to i8* ()*)()
+  ret i8* %X
+}
+
+; Return type that's a vector of pointers
+define <2 x i8*> @test12a() {
+  ret <2 x i8*> zeroinitializer
+}
+
+define <2 x i32*> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK: %X = call <2 x i8*> @test12a()
+; CHECK: %1 = bitcast <2 x i8*> %X to <2 x i32*>
+  %X = call <2 x i32*> bitcast (<2 x i8*> ()* @test12a to <2 x i32*> ()*)()
+  ret <2 x i32*> %X
+}
+
+define <2 x i8 addrspace(1)*> @test12a_mixed_as() {
+  ret <2 x i8 addrspace(1)*> zeroinitializer
+}
+
+define <2 x i8*> @test12_mixed_as() {
+; CHECK-LABEL: @test12_mixed_as(
+; CHECK: call <2 x i8*> bitcast
+  %X = call <2 x i8*> bitcast (<2 x i8 addrspace(1)*> ()* @test12a_mixed_as to <2 x i8*> ()*)()
+  ret <2 x i8*> %X
+}
+
+
+; Mix parameter that's a vector of integers and pointers of the same size
+declare void @test13a(<2 x i64>)
+
+define void @test13(<2 x i32*> %A) {
+; CHECK-LABEL: @test13(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i64>)* @test13a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Mix parameter that's a vector of integers and pointers of the same
+; size, but the other way around
+declare void @test14a(<2 x i8*>)
+
+define void @test14(<2 x i64> %A) {
+; CHECK-LABEL: @test14(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8*>)* @test14a to void (<2 x i64>)*)(<2 x i64> %A)
+  ret void
+}
+
+
+; Return type that's a vector
+define <2 x i16> @test15a() {
+  ret <2 x i16> zeroinitializer
+}
+
+define i32 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK: %X = call <2 x i16> @test15a()
+; CHECK: %1 = bitcast <2 x i16> %X to i32
+  %X = call i32 bitcast (<2 x i16> ()* @test15a to i32 ()*)( )
+  ret i32 %X
+}
+
+define i32 @test16a() {
+  ret i32 0
+}
+
+define <2 x i16> @test16() {
+; CHECK-LABEL: @test16(
+; CHECK: %X = call i32 @test16a()
+; CHECK: %1 = bitcast i32 %X to <2 x i16>
+  %X = call <2 x i16> bitcast (i32 ()* @test16a to <2 x i16> ()*)( )
+  ret <2 x i16> %X
+}
+
+declare i32 @pr28655(i32 returned %V)
+
+define i32 @test17() {
+entry:
+  %C = call i32 @pr28655(i32 0)
+  ret i32 %C
+}
+; CHECK-LABEL: @test17(
+; CHECK: call i32 @pr28655(i32 0)
+; CHECK: ret i32 0
+
+define void @non_vararg(i8*, i32) {
+  ret void
+}
+
+define void @test_cast_to_vararg(i8* %this) {
+; CHECK-LABEL: test_cast_to_vararg
+; CHECK:  call void @non_vararg(i8* %this, i32 42)
+  call void (i8*, ...) bitcast (void (i8*, i32)* @non_vararg to void (i8*, ...)*)(i8* %this, i32 42)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/call2.ll b/llvm/test/Transforms/InstCombine/call2.ll
new file mode 100644
index 00000000000..70a5b3ce36e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call2.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine | llvm-dis
+
+; This used to crash trying to do a double-to-pointer conversion
+define i32 @bar() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp = call i32 (...) bitcast (i32 (i8*)* @f to i32 (...)*)( double 3.000000e+00 )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
+
+define i32 @f(i8* %p) {
+entry:
+	%p_addr = alloca i8*		; <i8**> [#uses=1]
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	store i8* %p, i8** %p_addr
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
diff --git a/llvm/test/Transforms/InstCombine/call_nonnull_arg.ll b/llvm/test/Transforms/InstCombine/call_nonnull_arg.ll
new file mode 100644
index 00000000000..8127f4734fc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call_nonnull_arg.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; InstCombine should mark null-checked argument as nonnull at callsite
+declare void @dummy(i32*, i32)
+
+define void @test(i32* %a, i32 %b) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i32* %a, null
+; CHECK-NEXT:    br i1 [[COND1]], label %dead, label %not_null
+; CHECK:       not_null:
+; CHECK-NEXT:    [[COND2:%.*]] = icmp eq i32 %b, 0
+; CHECK-NEXT:    br i1 [[COND2]], label %dead, label %not_zero
+; CHECK:       not_zero:
+; CHECK-NEXT:    call void @dummy(i32* nonnull %a, i32 %b)
+; CHECK-NEXT:    ret void
+; CHECK:       dead:
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %cond1 = icmp eq i32* %a, null
+  br i1 %cond1, label %dead, label %not_null
+not_null:
+  %cond2 = icmp eq i32 %b, 0
+  br i1 %cond2, label %dead, label %not_zero
+not_zero:
+  call void @dummy(i32* %a, i32 %b)
+  ret void
+dead:
+  unreachable
+}
+
+; The nonnull attribute in the 'bar' declaration is 
+; propagated to the parameters of the 'baz' callsite. 
+
+declare void @bar(i8*, i8* nonnull)
+declare void @baz(i8*, i8*)
+
+define void @deduce_nonnull_from_another_call(i8* %a, i8* %b) {
+; CHECK-LABEL: @deduce_nonnull_from_another_call(
+; CHECK-NEXT:    call void @bar(i8* %a, i8* %b)
+; CHECK-NEXT:    call void @baz(i8* nonnull %b, i8* nonnull %b)
+; CHECK-NEXT:    ret void
+;
+  call void @bar(i8* %a, i8* %b)
+  call void @baz(i8* %b, i8* %b)
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/callsite_nonnull_args_through_casts.ll b/llvm/test/Transforms/InstCombine/callsite_nonnull_args_through_casts.ll
new file mode 100644
index 00000000000..b7a1d1d3fb7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/callsite_nonnull_args_through_casts.ll
@@ -0,0 +1,99 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @foo(i8*)
+declare void @bar(i8 addrspace(1)*)
+
+define void @nonnullAfterBitCast() {
+entry:
+  %i = alloca i32, align 4
+  %tmp1 = bitcast i32* %i to i8*
+; CHECK: call void @foo(i8* nonnull %tmp1)
+  call void @foo(i8* %tmp1)
+  ret void
+}
+
+define void @nonnullAfterSExt(i8 %a) {
+entry:
+  %b = zext i8 %a to i32              ; <- %b is >= 0
+  %c = add nsw nuw i32 %b, 2          ; <- %c is > 0
+  %sext = sext i32 %c to i64          ; <- %sext cannot be 0 because %c is not 0
+  %i2p = inttoptr i64 %sext to i8*    ; <- no-op int2ptr cast
+; CHECK: call void @foo(i8* nonnull %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+define void @nonnullAfterZExt(i8 %a) {
+entry:
+  %b = zext i8 %a to i32              ; <- %b is >= 0
+  %c = add nsw nuw i32 %b, 2          ; <- %c is > 0
+  %zext = zext i32 %c to i64          ; <- %zext cannot be 0 because %c is not 0
+  %i2p = inttoptr i64 %zext to i8*    ; <- no-op int2ptr cast
+; CHECK: call void @foo(i8* nonnull %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+declare void @llvm.assume(i1 %b)
+
+define void @nonnullAfterInt2Ptr(i32 %u, i64 %lu) {
+entry:
+  %nz = sdiv exact i32 100, %u         ; %nz cannot be null
+  %i2p = inttoptr i32 %nz to i8*       ; extending int2ptr as sizeof(i32) < sizeof(i8*)
+; CHECK:  call void @foo(i8* nonnull %i2p)
+  call void @foo(i8* %i2p)
+
+  %nz.2 = sdiv exact i64 100, %lu      ; %nz.2 cannot be null
+  %i2p.2 = inttoptr i64 %nz.2 to i8*   ; no-op int2ptr as sizeof(i64) == sizeof(i8*)
+; CHECK:  call void @foo(i8* nonnull %i2p.2)
+  call void @foo(i8* %i2p.2)
+  ret void
+}
+
+define void @nonnullAfterPtr2Int() {
+entry:
+  %a = alloca i32
+  %p2i = ptrtoint i32* %a to i64      ; no-op ptr2int as sizeof(i32*) == sizeof(i64)
+  %i2p = inttoptr i64 %p2i to i8*
+; CHECK:  call void @foo(i8* nonnull %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+define void @maybenullAfterInt2Ptr(i128 %llu) {
+entry:
+  %cmp = icmp ne i128 %llu, 0
+  call void @llvm.assume(i1 %cmp)          ; %llu != 0
+  %i2p = inttoptr i128 %llu to i8*    ; truncating int2ptr as sizeof(i128) > sizeof(i8*)
+; CHECK:  call void @foo(i8* %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+define void @maybenullAfterPtr2Int() {
+entry:
+  %a = alloca i32
+  %p2i = ptrtoint i32* %a to i32      ; truncating ptr2int as sizeof(i32*) > sizeof(i32)
+  %i2p = inttoptr i32 %p2i to i8*
+; CHECK:  call void @foo(i8* %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+define void @maybenullAfterAddrspacecast(i8* nonnull %p) {
+entry:
+  %addrspcast = addrspacecast i8* %p to i8 addrspace(1)*
+
+; An address space cast can be "a no-op cast or a complex value modification,
+; depending on the target and the address space pair". As a consequence, we
+; cannot simply assume non-nullness of %p is preserved by the cast.
+;
+; CHECK:  call void @bar(i8 addrspace(1)* %addrspcast)
+  call void @bar(i8 addrspace(1)* %addrspcast)
+
+; CHECK:  call void @foo(i8* nonnull %p)
+  call void @foo(i8* %p)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
new file mode 100644
index 00000000000..484779bd6fa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
@@ -0,0 +1,359 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=37603
+; https://reviews.llvm.org/D46760#1123713
+
+; Pattern:
+;   x >> y << y
+; Should be transformed into:
+;   x & (-1 << y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i32 @positive_samevar(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, %y
+  %ret = shl i32 %tmp0, %y
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst(i32 %x) {
+; CHECK-LABEL: @positive_sameconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -32
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %tmp0 = ashr i32 %x, 5
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerashr(i32 %x) {
+; CHECK-LABEL: @positive_biggerashr(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[X:%.*]], 10
+; CHECK-NEXT:    [[RET:%.*]] = shl nsw i32 [[TMP0]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, 10
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl(i32 %x) {
+; CHECK-LABEL: @positive_biggershl(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 5
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP1]], 10
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, 5
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; EXACT on the first shift
+; ============================================================================ ;
+
+define i32 @positive_samevar_ashrexact(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar_ashrexact(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = ashr exact i32 %x, %y
+  %ret = shl i32 %tmp0, %y ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst_ashrexact(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_ashrexact(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  %ret = shl i32 %tmp0, 5 ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggerashr_ashrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggerashr_ashrexact(
+; CHECK-NEXT:    [[RET:%.*]] = ashr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 10
+  %ret = shl i32 %tmp0, 5 ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_ashrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_ashrexact(
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_ashrexact_shlnuw(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_ashrexact_shlnuw(
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  %ret = shl nuw i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Vector
+; ============================================================================ ;
+
+define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @positive_samevar_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 -1, i32 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, %y
+  %ret = shl <2 x i32> %tmp0, %y
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -32, i32 -32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef2(
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], <i32 -32, i32 undef, i32 -32>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerashr_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl nsw <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 10, i32 10>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerashr_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerashr_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 10, i32 10, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerashr_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggershl_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP1]], <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 10, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 10, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Positive multi-use tests with constant
+; ============================================================================ ;
+
+; FIXME: drop 'exact' once it is no longer needed.
+
+define i32 @positive_sameconst_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerashr_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggerashr_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr exact i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = ashr exact i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 10
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Constant Non-Splat Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_biggerashr_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 5, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerLashr_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLashr_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 5, i32 10>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+define i32 @negative_twovars(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_twovars(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, %y
+  %ret = shl i32 %tmp0, %z ; $z, not %y
+  ret i32 %ret
+}
+
+declare void @use32(i32)
+
+; One use only.
+define i32 @negative_oneuse(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_oneuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], [[Y]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, %y
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, %y
+  ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
new file mode 100644
index 00000000000..f46bcdfc997
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
@@ -0,0 +1,190 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C == x
+; Should be transformed into:
+;   x u<= C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp eq i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %x, i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp eq <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp eq <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[X:%.*]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp eq <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp eq i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp eq i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp eq i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp eq i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp eq <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
new file mode 100644
index 00000000000..850266381e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
@@ -0,0 +1,190 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C != x
+; Should be transformed into:
+;   x u> C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ne i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %x, i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp ne <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp ne <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp ne <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ne i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp ne i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp ne i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ne i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp ne <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
new file mode 100644
index 00000000000..ca1b86c0623
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C s>= x
+; Should be transformed into:
+;   x s<= C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp sge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp sge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <3 x i8> [[X:%.*]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp sge <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp sge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sge i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+; Ok, this one should fold. We only testing commutativity of 'and'.
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp sge i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp sge i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp sge i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Normal negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp sge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sge i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp sge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+; ============================================================================ ;
+; Potential miscompiles.
+; ============================================================================ ;
+
+define i1 @nv(i8 %x, i8 %y) {
+; CHECK-LABEL: @nv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp sge i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n3_vec(<2 x i8> %x) {
+; CHECK-LABEL: @n3_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 -1>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 -1>
+  %ret = icmp sge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @n4_vec(<3 x i8> %x) {
+; CHECK-LABEL: @n4_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 -1>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge <3 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[RET]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 -1>
+  %ret = icmp sge <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
new file mode 100644
index 00000000000..299ee78b7db
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x s> x & C
+; Should be transformed into:
+;   x s> C
+; Iff: isPowerOf2(C + 1)
+
+; NOTE: this pattern is not commutative!
+
+declare i8 @gen8()
+declare <2 x i8> @gen2x8()
+declare <3 x i8> @gen3x8()
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0() {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sgt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp sgt i8 %x, %tmp1
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat() {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp sgt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat() {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp sgt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef() {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <3 x i8> [[X]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %x = call <3 x i8> @gen3x8()
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp sgt <3 x i8> %x, %tmp0
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0() {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp sgt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+define i1 @c0(i8 %x) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sgt i8 %tmp0, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+; Ok, this one should fold. We only testing commutativity of 'and'.
+define i1 @cv0_GOOD(i8 %y) {
+; CHECK-LABEL: @cv0_GOOD(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp sgt i8 %x, %tmp1
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0
+  %ret = icmp sgt i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %x, i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp sgt i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Normal negative tests
+; ============================================================================ ;
+
+define i1 @n0() {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp sgt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sgt i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2() {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp sgt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
new file mode 100644
index 00000000000..11596fc7540
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x s<= x & C
+; Should be transformed into:
+;   x s<= C
+; Iff: isPowerOf2(C + 1)
+
+; NOTE: this pattern is not commutative!
+
+declare i8 @gen8()
+declare <2 x i8> @gen2x8()
+declare <3 x i8> @gen3x8()
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0() {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sle i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp sle i8 %x, %tmp1
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat() {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp sle <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat() {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp sle <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef() {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <3 x i8> [[X]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %x = call <3 x i8> @gen3x8()
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp sle <3 x i8> %x, %tmp0
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0() {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp sle i8 %x, %tmp0
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+define i1 @c0(i8 %x) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sle i8 %tmp0, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+; Ok, this one should fold. We only testing commutativity of 'and'.
+define i1 @cv0_GOOD(i8 %y) {
+; CHECK-LABEL: @cv0_GOOD(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp sle i8 %x, %tmp1
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0
+  %ret = icmp sle i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %x, i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp sle i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Normal negative tests
+; ============================================================================ ;
+
+define i1 @n0() {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp sle i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sle i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2() {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp sle <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
new file mode 100644
index 00000000000..2957ad5731c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C s< x
+; Should be transformed into:
+;   x s> C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp slt i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp slt <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp slt <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp slt <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp slt i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp slt i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+; Ok, this one should fold. We only testing commutativity of 'and'.
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp slt i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp slt i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp slt i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Normal negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp slt i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp slt i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp slt <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+; ============================================================================ ;
+; Potential miscompiles.
+; ============================================================================ ;
+
+define i1 @nv(i8 %x, i8 %y) {
+; CHECK-LABEL: @nv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp slt i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n3(<2 x i8> %x) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 -1>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 -1>
+  %ret = icmp slt <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @n4(<3 x i8> %x) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 -1>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <3 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[RET]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 -1>
+  %ret = icmp slt <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
new file mode 100644
index 00000000000..f17d6b47a08
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C u>= x
+; Should be transformed into:
+;   x u<= C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp uge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %x, i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp uge i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp uge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp uge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[X:%.*]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp uge <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+; The pattern is not commutative. instsimplify will already take care of it.
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp uge i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp uge i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp uge i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp uge i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp uge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp uge i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp uge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp uge i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp uge i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp uge <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp uge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
new file mode 100644
index 00000000000..7512b72f2a7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x u> x & C
+; Should be transformed into:
+;   x u> C
+; Iff: isPowerOf2(C + 1)
+
+declare i8 @gen8()
+declare <2 x i8> @gen2x8()
+declare <3 x i8> @gen3x8()
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0() {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ugt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ugt i8 %x, %tmp1
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat() {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp ugt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat() {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp ugt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef() {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <3 x i8> [[X]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %x = call <3 x i8> @gen3x8()
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp ugt <3 x i8> %x, %tmp0
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+define i1 @c0(i8 %x) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ugt i8 %tmp0, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp ugt i8 %x, %tmp1
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 false
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0
+  %ret = icmp ugt i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %x, i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp ugt i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0() {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp ugt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0() {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp ugt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ugt i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2() {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp ugt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
new file mode 100644
index 00000000000..0bab709618d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x u<= x & C
+; Should be transformed into:
+;   x u<= C
+; Iff: isPowerOf2(C + 1)
+
+declare i8 @gen8()
+declare <2 x i8> @gen2x8()
+declare <3 x i8> @gen3x8()
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0() {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ule i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ule i8 %x, %tmp1
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat() {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp ule <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat() {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp ule <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef() {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[X]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %x = call <3 x i8> @gen3x8()
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp ule <3 x i8> %x, %tmp0
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+define i1 @c0(i8 %x) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ule i8 %tmp0, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp ule i8 %x, %tmp1
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0
+  %ret = icmp ule i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %x, i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp ule i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0() {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp ule i8 %x, %tmp0
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0() {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp ule i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ule i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ule i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2() {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ule <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp ule <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
new file mode 100644
index 00000000000..287e3699932
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C u< x
+; Should be transformed into:
+;   x u> C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ult i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %x, i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ult i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp ult <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp ult <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp ult <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+; The pattern is not commutative. instsimplify will already take care of it.
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 false
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ult i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ult i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 false
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ult i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 false
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ult i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp ult i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp ult i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ult i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp ult <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
new file mode 100644
index 00000000000..60aa4d444ca
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38149
+
+; Pattern:
+;   ((%x << MaskedBits) a>> MaskedBits) == %x
+; Should be transformed into:
+;   (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
+; Where  KeptBits = bitwidth(%x) - MaskedBits
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204
+define i1 @pb(i65 %x) {
+; CHECK-LABEL: @pb(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i65 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i65 %x, 1
+  %tmp1 = ashr exact i65 %tmp0, 1
+  %tmp2 = icmp eq i65 %x, %tmp1
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 8, i8 8>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 5>
+  %tmp2 = icmp eq <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 6>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 6>
+  %tmp2 = icmp eq <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 undef, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 5, i8 5>
+  %tmp2 = icmp eq <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+define <3 x i1> @p4_vec_undef1(<3 x i8> %x) {
+; CHECK-LABEL: @p4_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 5, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 undef, i8 5>
+  %tmp2 = icmp eq <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+define <3 x i1> @p5_vec_undef2(<3 x i8> %x) {
+; CHECK-LABEL: @p5_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 undef, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 undef, i8 5>
+  %tmp2 = icmp eq <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @n_oneuse0(i8 %x) {
+; CHECK-LABEL: @n_oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  call void @use8(i8 %tmp0)
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n_oneuse1(i8 %x) {
+; CHECK-LABEL: @n_oneuse1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  call void @use8(i8 %tmp1)
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n_oneuse2(i8 %x) {
+; CHECK-LABEL: @n_oneuse2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  call void @use8(i8 %tmp0)
+  %tmp1 = ashr exact i8 %tmp0, 5
+  call void @use8(i8 %tmp1)
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 3 ; not 5
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n1(i8 %x) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 8
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = lshr exact i8 %tmp0, 5 ; not ashr
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp eq i8 %tmp1, %y ; not %x
+  ret i1 %tmp2
+}
+
+define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @n3_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 3> ; 3 instead of 5
+  %tmp2 = icmp eq <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
new file mode 100644
index 00000000000..dc5fbebc230
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & (-1 >> y) == x
+; Should be transformed into:
+;   x u<= (-1 >> y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <2 x i8> [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = lshr <2 x i8> <i8 -1, i8 -1>, %y
+  %tmp1 = and <2 x i8> %tmp0, %x
+  %ret = icmp eq <2 x i8> %tmp1, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <3 x i8> [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %tmp1 = and <3 x i8> %tmp0, %x
+  %ret = icmp eq <3 x i8> %tmp1, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  call void @use8(i8 %tmp0)
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  call void @use8(i8 %tmp1)
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  call void @use8(i8 %tmp0)
+  %tmp1 = and i8 %tmp0, %x
+  call void @use8(i8 %tmp1)
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP1]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %tmp1, %notx ; not %x
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
new file mode 100644
index 00000000000..535c628234a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & (-1 >> y) != x
+; Should be transformed into:
+;   x u> (-1 >> y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = lshr <2 x i8> <i8 -1, i8 -1>, %y
+  %tmp1 = and <2 x i8> %tmp0, %x
+  %ret = icmp ne <2 x i8> %tmp1, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %tmp1 = and <3 x i8> %tmp0, %x
+  %ret = icmp ne <3 x i8> %tmp1, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  call void @use8(i8 %tmp0)
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  call void @use8(i8 %tmp1)
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  call void @use8(i8 %tmp0)
+  %tmp1 = and i8 %tmp0, %x
+  call void @use8(i8 %tmp1)
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP1]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %tmp1, %notx ; not %x
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
new file mode 100644
index 00000000000..42b7b3e9359
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
@@ -0,0 +1,297 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ~(-1 << y) == x
+; Should be transformed into:
+;   x u<= ~(-1 << y)
+; That is then later transformed into:
+;   (x >> y) == 0
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %y
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 -1>
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp eq <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p4_vec_undef2(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse3(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse4(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse4(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse5(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse5(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y ; not -1
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], 1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, 1 ; not -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
new file mode 100644
index 00000000000..2826d2d33c1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
@@ -0,0 +1,297 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ~(-1 << y) != x
+; Should be transformed into:
+;   x u> ~(-1 << y)
+; That is then later transformed into:
+;   (x >> y) != 0
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %y
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 -1>
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp ne <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p4_vec_undef2(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse3(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse4(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse4(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse5(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse5(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y ; not -1
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], 1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, 1 ; not -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
new file mode 100644
index 00000000000..af7700c8473
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ((1 << y) + (-1)) == x
+; Should be transformed into:
+;   x u<= ((1 << y) + (-1))
+; That is then later transformed into:
+;   (x >> y) == 0
+
+; This pattern is uncanonical, but we can not canonicalize it due to extra uses.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %y
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 -1>
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp eq <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 1, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p4_vec_undef2(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y ; not 1
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], 1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, 1 ; not -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
new file mode 100644
index 00000000000..dfe5b713284
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ((1 << y) + (-1)) != x
+; Should be transformed into:
+;   x u> ((1 << y) + (-1))
+; That is then later transformed into:
+;   (x >> y) != 0
+
+; This pattern is uncanonical, but we can not canonicalize it due to extra uses.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %y
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 -1>
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp ne <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 1, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p4_vec_undef2(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y ; not 1
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], 1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, 1 ; not -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll
new file mode 100644
index 00000000000..7a32c5e3f03
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll
@@ -0,0 +1,248 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ((-1 << y) >> y) == x
+; Should be transformed into:
+;   x u<= ((-1 << y) >> y)
+
+; This pattern is uncanonical, but we can not canonicalize it due to extra uses.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i8> [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <2 x i8> [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %y
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = lshr <2 x i8> %t0, %y
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp eq <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i8> [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <3 x i8> [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = lshr <3 x i8> %t0, %y
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = lshr i8 %t0, %y
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y ; not -1
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y1, i8 %y2) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y1:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y2:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y1 ; not %y2
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y2 ; not %y1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll
new file mode 100644
index 00000000000..86a91679a26
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll
@@ -0,0 +1,248 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ((-1 << y) >> y) != x
+; Should be transformed into:
+;   x u> ((-1 << y) >> y)
+
+; This pattern is uncanonical, but we can not canonicalize it due to extra uses.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i8> [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %y
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = lshr <2 x i8> %t0, %y
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp ne <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i8> [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = lshr <3 x i8> %t0, %y
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = lshr i8 %t0, %y
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y ; not -1
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y1, i8 %y2) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y1:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y2:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y1 ; not %y2
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y2 ; not %y1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
new file mode 100644
index 00000000000..a1ba4e38525
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
@@ -0,0 +1,359 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=37603
+; https://reviews.llvm.org/D46760#1123713
+
+; Pattern:
+;   x >> y << y
+; Should be transformed into:
+;   x & (-1 << y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i32 @positive_samevar(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, %y
+  %ret = shl i32 %tmp0, %y
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst(i32 %x) {
+; CHECK-LABEL: @positive_sameconst(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X:%.*]], -32
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+  %tmp0 = lshr i32 %x, 5
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerlshr(i32 %x) {
+; CHECK-LABEL: @positive_biggerlshr(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X:%.*]], 10
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw nsw i32 [[TMP0]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, 10
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl(i32 %x) {
+; CHECK-LABEL: @positive_biggershl(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X:%.*]], 5
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], 10
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, 5
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; EXACT on the first shift
+; ============================================================================ ;
+
+define i32 @positive_samevar_lshrexact(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar_lshrexact(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = lshr exact i32 %x, %y
+  %ret = shl i32 %tmp0, %y ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst_lshrexact(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_lshrexact(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  %ret = shl i32 %tmp0, 5 ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggerlshr_lshrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggerlshr_lshrexact(
+; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 10
+  %ret = shl i32 %tmp0, 5 ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_lshrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_lshrexact(
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_lshrexact_shlnuw(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_lshrexact_shlnuw(
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  %ret = shl nuw i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Vector
+; ============================================================================ ;
+
+define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @positive_samevar_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 -1, i32 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, %y
+  %ret = shl <2 x i32> %tmp0, %y
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[X:%.*]], <i32 -32, i32 -32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef2(
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], <i32 -32, i32 undef, i32 -32>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerlshr_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw nsw <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 10, i32 10>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerlshr_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerlshr_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 10, i32 10, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerlshr_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggershl_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 10, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 10, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Positive multi-use tests with constant
+; ============================================================================ ;
+
+; FIXME: drop 'exact' once it is no longer needed.
+
+define i32 @positive_sameconst_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerlshr_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggerlshr_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 10
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Constant Non-Splat Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_biggerlshr_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 5, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerLlshr_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLlshr_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 5, i32 10>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+define i32 @negative_twovars(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_twovars(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, %y
+  %ret = shl i32 %tmp0, %z ; $z, not %y
+  ret i32 %ret
+}
+
+declare void @use32(i32)
+
+; One use only.
+define i32 @negative_oneuse(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_oneuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], [[Y]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, %y
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, %y
+  ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
new file mode 100644
index 00000000000..9de0b337de2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=37603
+
+; Pattern:
+;   x << y >> y
+; Should be transformed into:
+;   x & (-1 >> y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i32 @positive_samevar(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, %y
+  %ret = lshr i32 %tmp0, %y
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst(i32 %x) {
+; CHECK-LABEL: @positive_sameconst(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X:%.*]], 134217727
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+  %tmp0 = shl i32 %x, 5
+  %ret = lshr i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerShl(i32 %x) {
+; CHECK-LABEL: @positive_biggerShl(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 134217696
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, 10
+  %ret = lshr i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerLshr(i32 %x) {
+; CHECK-LABEL: @positive_biggerLshr(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 5
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, 5
+  %ret = lshr i32 %tmp0, 10
+  ret i32 %ret
+}
+
+define i32 @positive_biggerLshr_lshrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggerLshr_lshrexact(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, 5
+  %ret = lshr exact i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; NUW on the first shift
+; ============================================================================ ;
+
+define i32 @positive_samevar_shlnuw(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar_shlnuw(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = shl nuw i32 %x, %y
+  %ret = lshr i32 %tmp0, %y ; this one is obviously 'exact'.
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst_shlnuw(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_shlnuw(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = shl nuw i32 %x, 5
+  %ret = lshr i32 %tmp0, 5 ; this one is obviously 'exact'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggerShl_shlnuw(i32 %x) {
+; CHECK-LABEL: @positive_biggerShl_shlnuw(
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl nuw i32 %x, 10
+  %ret = lshr i32 %tmp0, 5 ; this one is obviously 'exact'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggerLshr_shlnuw(i32 %x) {
+; CHECK-LABEL: @positive_biggerLshr_shlnuw(
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl nuw i32 %x, 5
+  %ret = lshr i32 %tmp0, 10
+  ret i32 %ret
+}
+
+define i32 @positive_biggerLshr_shlnuw_lshrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggerLshr_shlnuw_lshrexact(
+; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl nuw i32 %x, 5
+  %ret = lshr exact i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Vector
+; ============================================================================ ;
+
+define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @positive_samevar_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> <i32 -1, i32 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = shl <2 x i32> %x, %y
+  %ret = lshr <2 x i32> %tmp0, %y
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[X:%.*]], <i32 134217727, i32 134217727>
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+  %tmp0 = shl <2 x i32> %x, <i32 5, i32 5>
+  %ret = lshr <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef2(
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], <i32 134217727, i32 undef, i32 134217727>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerShl_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], <i32 134217696, i32 134217696>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = shl <2 x i32> %x, <i32 10, i32 10>
+  %ret = lshr <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerShl_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerShl_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerShl_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 10, i32 10, i32 10>
+  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerShl_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerShl_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLshr_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], <i32 4194303, i32 4194303>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = shl <2 x i32> %x, <i32 5, i32 5>
+  %ret = lshr <2 x i32> %tmp0, <i32 10, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLshr_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = lshr <3 x i32> %tmp0, <i32 10, i32 10, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerLshr_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLshr_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = lshr <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerLshr_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLshr_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = lshr <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Positive multi-use tests with constant
+; ============================================================================ ;
+
+define i32 @positive_sameconst_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[X]], 134217727
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = lshr i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerShl_shlnuw_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggerShl_shlnuw_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl nuw i32 %x, 10
+  call void @use32(i32 %tmp0)
+  %ret = lshr i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerLshr_shlnuw_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggerLshr_shlnuw_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl nuw i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = lshr i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; NOTE: creates one extra instruction, but this seems intentional.
+define i32 @positive_biggerShl_multiuse_extrainstr(i32 %x) {
+; CHECK-LABEL: @positive_biggerShl_multiuse_extrainstr(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X]], 5
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 134217696
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, 10
+  call void @use32(i32 %tmp0)
+  %ret = lshr i32 %tmp0, 5
+  ret i32 %ret
+}
+
+; NOTE: creates one extra instruction, but this seems intentional.
+define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) {
+; CHECK-LABEL: @positive_biggerLshr_multiuse_extrainstr(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 5
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = lshr i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Constant Non-Splat Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerShl_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[TMP0]], <i32 5, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = shl <2 x i32> %x, <i32 5, i32 5>
+  %ret = lshr <2 x i32> %tmp0, <i32 5, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerLshl_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLshl_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = shl <2 x i32> %x, <i32 5, i32 10>
+  %ret = lshr <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+define i32 @negative_twovars(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_twovars(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[TMP0]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, %y
+  %ret = lshr i32 %tmp0, %z ; $z, not %y
+  ret i32 %ret
+}
+
+declare void @use32(i32)
+
+; One use only.
+define i32 @negative_oneuse(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_oneuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[TMP0]], [[Y]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = shl i32 %x, %y
+  call void @use32(i32 %tmp0)
+  %ret = lshr i32 %tmp0, %y
+  ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
new file mode 100644
index 00000000000..90d19be434f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38149
+
+; Pattern:
+;   ((%x << MaskedBits) a>> MaskedBits) != %x
+; Should be transformed into:
+;   (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
+; Where  KeptBits = bitwidth(%x) - MaskedBits
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp ne i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204
+define i1 @pb(i65 %x) {
+; CHECK-LABEL: @pb(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i65 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i65 %x, 1
+  %tmp1 = ashr exact i65 %tmp0, 1
+  %tmp2 = icmp ne i65 %x, %tmp1
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i8> [[TMP1]], <i8 7, i8 7>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 5>
+  %tmp2 = icmp ne <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 6>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 6>
+  %tmp2 = icmp ne <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 undef, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 5, i8 5>
+  %tmp2 = icmp ne <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+define <3 x i1> @p4_vec_undef1(<3 x i8> %x) {
+; CHECK-LABEL: @p4_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 5, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 undef, i8 5>
+  %tmp2 = icmp ne <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+define <3 x i1> @p5_vec_undef2(<3 x i8> %x) {
+; CHECK-LABEL: @p5_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 undef, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 undef, i8 5>
+  %tmp2 = icmp ne <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @n_oneuse0(i8 %x) {
+; CHECK-LABEL: @n_oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  call void @use8(i8 %tmp0)
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp ne i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n_oneuse1(i8 %x) {
+; CHECK-LABEL: @n_oneuse1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  call void @use8(i8 %tmp1)
+  %tmp2 = icmp ne i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n_oneuse2(i8 %x) {
+; CHECK-LABEL: @n_oneuse2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  call void @use8(i8 %tmp0)
+  %tmp1 = ashr exact i8 %tmp0, 5
+  call void @use8(i8 %tmp1)
+  %tmp2 = icmp ne i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 3 ; not 5
+  %tmp2 = icmp ne i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n1(i8 %x) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], 7
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = lshr exact i8 %tmp0, 5 ; not ashr
+  %tmp2 = icmp ne i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp ne i8 %tmp1, %y ; not %x
+  ret i1 %tmp2
+}
+
+define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @n3_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 3> ; 3 instead of 5
+  %tmp2 = icmp ne <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}
diff --git a/llvm/test/Transforms/InstCombine/canonicalize_branch.ll b/llvm/test/Transforms/InstCombine/canonicalize_branch.ll
new file mode 100644
index 00000000000..401490879e9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize_branch.ll
@@ -0,0 +1,500 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Test an already canonical branch to make sure we don't flip those.
+define i32 @eq(i32 %X, i32 %Y) {
+; CHECK-LABEL: @eq(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !0
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp eq i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !0
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @ne(i32 %X, i32 %Y) {
+; CHECK-LABEL: @ne(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !1
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp ne i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !1
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @ugt(i32 %X, i32 %Y) {
+; CHECK-LABEL: @ugt(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !2
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp ugt i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !2
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @uge(i32 %X, i32 %Y) {
+; CHECK-LABEL: @uge(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !3
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp uge i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !3
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @ult(i32 %X, i32 %Y) {
+; CHECK-LABEL: @ult(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !4
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp ult i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !4
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @ule(i32 %X, i32 %Y) {
+; CHECK-LABEL: @ule(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !5
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp ule i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !5
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @sgt(i32 %X, i32 %Y) {
+; CHECK-LABEL: @sgt(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !6
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp sgt i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !6
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @sge(i32 %X, i32 %Y) {
+; CHECK-LABEL: @sge(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !7
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp sge i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !7
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @slt(i32 %X, i32 %Y) {
+; CHECK-LABEL: @slt(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !8
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp slt i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !8
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @sle(i32 %X, i32 %Y) {
+; CHECK-LABEL: @sle(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !9
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = icmp sle i32 %X, %Y
+  br i1 %C, label %T, label %F, !prof !9
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_false(float %X, float %Y) {
+; CHECK-LABEL: @f_false(
+; CHECK-NEXT:    br i1 false, label [[T:%.*]], label [[F:%.*]], !prof !10
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp false float %X, %Y
+  br i1 %C, label %T, label %F, !prof !10
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_oeq(float %X, float %Y) {
+; CHECK-LABEL: @f_oeq(
+; CHECK-NEXT:    [[C:%.*]] = fcmp oeq float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !11
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp oeq float %X, %Y
+  br i1 %C, label %T, label %F, !prof !11
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ogt(float %X, float %Y) {
+; CHECK-LABEL: @f_ogt(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !12
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ogt float %X, %Y
+  br i1 %C, label %T, label %F, !prof !12
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_oge(float %X, float %Y) {
+; CHECK-LABEL: @f_oge(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ult float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !13
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp oge float %X, %Y
+  br i1 %C, label %T, label %F, !prof !13
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_olt(float %X, float %Y) {
+; CHECK-LABEL: @f_olt(
+; CHECK-NEXT:    [[C:%.*]] = fcmp olt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !14
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp olt float %X, %Y
+  br i1 %C, label %T, label %F, !prof !14
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ole(float %X, float %Y) {
+; CHECK-LABEL: @f_ole(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ugt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !15
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ole float %X, %Y
+  br i1 %C, label %T, label %F, !prof !15
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_one(float %X, float %Y) {
+; CHECK-LABEL: @f_one(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ueq float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[F:%.*]], label [[T:%.*]], !prof !16
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp one float %X, %Y
+  br i1 %C, label %T, label %F, !prof !16
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ord(float %X, float %Y) {
+; CHECK-LABEL: @f_ord(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ord float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !17
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ord float %X, %Y
+  br i1 %C, label %T, label %F, !prof !17
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_uno(float %X, float %Y) {
+; CHECK-LABEL: @f_uno(
+; CHECK-NEXT:    [[C:%.*]] = fcmp uno float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !18
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp uno float %X, %Y
+  br i1 %C, label %T, label %F, !prof !18
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ueq(float %X, float %Y) {
+; CHECK-LABEL: @f_ueq(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ueq float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !19
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ueq float %X, %Y
+  br i1 %C, label %T, label %F, !prof !19
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ugt(float %X, float %Y) {
+; CHECK-LABEL: @f_ugt(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ugt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !20
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ugt float %X, %Y
+  br i1 %C, label %T, label %F, !prof !20
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_uge(float %X, float %Y) {
+; CHECK-LABEL: @f_uge(
+; CHECK-NEXT:    [[C:%.*]] = fcmp uge float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !21
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp uge float %X, %Y
+  br i1 %C, label %T, label %F, !prof !21
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ult(float %X, float %Y) {
+; CHECK-LABEL: @f_ult(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ult float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !22
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ult float %X, %Y
+  br i1 %C, label %T, label %F, !prof !22
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_ule(float %X, float %Y) {
+; CHECK-LABEL: @f_ule(
+; CHECK-NEXT:    [[C:%.*]] = fcmp ule float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !23
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp ule float %X, %Y
+  br i1 %C, label %T, label %F, !prof !23
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_une(float %X, float %Y) {
+; CHECK-LABEL: @f_une(
+; CHECK-NEXT:    [[C:%.*]] = fcmp une float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]], !prof !24
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp une float %X, %Y
+  br i1 %C, label %T, label %F, !prof !24
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+define i32 @f_true(float %X, float %Y) {
+; CHECK-LABEL: @f_true(
+; CHECK-NEXT:    br i1 true, label [[T:%.*]], label [[F:%.*]], !prof !25
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 12
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 123
+;
+  %C = fcmp true float %X, %Y
+  br i1 %C, label %T, label %F, !prof !25
+T:
+  ret i32 12
+F:
+  ret i32 123
+}
+
+
+!0  = !{!"branch_weights", i32 0,  i32 99}
+!1  = !{!"branch_weights", i32 1,  i32 99}
+!2  = !{!"branch_weights", i32 2,  i32 99}
+!3  = !{!"branch_weights", i32 3,  i32 99}
+!4  = !{!"branch_weights", i32 4,  i32 99}
+!5  = !{!"branch_weights", i32 5,  i32 99}
+!6  = !{!"branch_weights", i32 6,  i32 99}
+!7  = !{!"branch_weights", i32 7,  i32 99}
+!8  = !{!"branch_weights", i32 8,  i32 99}
+!9  = !{!"branch_weights", i32 9,  i32 99}
+!10 = !{!"branch_weights", i32 10, i32 99}
+!11 = !{!"branch_weights", i32 11, i32 99}
+!12 = !{!"branch_weights", i32 12, i32 99}
+!13 = !{!"branch_weights", i32 13, i32 99}
+!14 = !{!"branch_weights", i32 14, i32 99}
+!15 = !{!"branch_weights", i32 15, i32 99}
+!16 = !{!"branch_weights", i32 16, i32 99}
+!17 = !{!"branch_weights", i32 17, i32 99}
+!18 = !{!"branch_weights", i32 18, i32 99}
+!19 = !{!"branch_weights", i32 19, i32 99}
+!20 = !{!"branch_weights", i32 20, i32 99}
+!21 = !{!"branch_weights", i32 21, i32 99}
+!22 = !{!"branch_weights", i32 22, i32 99}
+!23 = !{!"branch_weights", i32 23, i32 99}
+!24 = !{!"branch_weights", i32 24, i32 99}
+!25 = !{!"branch_weights", i32 25, i32 99}
+
+; Ensure that the branch metadata is reversed to match the reversals above.
+; CHECK: !0 = {{.*}} i32 0, i32 99}
+; CHECK: !1 = {{.*}} i32 99, i32 1}
+; CHECK: !2 = {{.*}} i32 2, i32 99}
+; CHECK: !3 = {{.*}} i32 99, i32 3}
+; CHECK: !4 = {{.*}} i32 4, i32 99}
+; CHECK: !5 = {{.*}} i32 99, i32 5}
+; CHECK: !6 = {{.*}} i32 6, i32 99}
+; CHECK: !7 = {{.*}} i32 99, i32 7}
+; CHECK: !8 = {{.*}} i32 8, i32 99}
+; CHECK: !9 = {{.*}} i32 99, i32 9}
+; CHECK: !10 = {{.*}} i32 10, i32 99}
+; CHECK: !11 = {{.*}} i32 11, i32 99}
+; CHECK: !12 = {{.*}} i32 12, i32 99}
+; CHECK: !13 = {{.*}} i32 99, i32 13}
+; CHECK: !14 = {{.*}} i32 14, i32 99}
+; CHECK: !15 = {{.*}} i32 99, i32 15}
+; CHECK: !16 = {{.*}} i32 99, i32 16}
+; CHECK: !17 = {{.*}} i32 17, i32 99}
+; CHECK: !18 = {{.*}} i32 18, i32 99}
+; CHECK: !19 = {{.*}} i32 19, i32 99}
+; CHECK: !20 = {{.*}} i32 20, i32 99}
+; CHECK: !21 = {{.*}} i32 21, i32 99}
+; CHECK: !22 = {{.*}} i32 22, i32 99}
+; CHECK: !23 = {{.*}} i32 23, i32 99}
+; CHECK: !24 = {{.*}} i32 24, i32 99}
+; CHECK: !25 = {{.*}} i32 25, i32 99}
+
diff --git a/llvm/test/Transforms/InstCombine/cast-call-combine-prof.ll b/llvm/test/Transforms/InstCombine/cast-call-combine-prof.ll
new file mode 100644
index 00000000000..510473eb37d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-call-combine-prof.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -instcombine < %s | FileCheck -enable-var-scope %s
+
+; Check that instcombine preserves !prof metadata when removing function
+; prototype casts.
+
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare void @foo(i16* %a)
+
+; CHECK-LABEL: @test_call()
+; CHECK: call void @foo(i16* null), !prof ![[$PROF:[0-9]+]]
+define void @test_call() {
+  call void bitcast (void (i16*)* @foo to void (i8*)*) (i8* null), !prof !0
+  ret void
+}
+
+; CHECK-LABEL: @test_invoke()
+; CHECK: invoke void @foo(i16* null)
+; CHECK-NEXT: to label %done unwind label %lpad, !prof ![[$PROF]]
+define void @test_invoke() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  invoke void bitcast (void (i16*)* @foo to void (i8*)*) (i8* null)
+          to label %done unwind label %lpad, !prof !0
+
+done:
+  ret void
+
+lpad:
+  %lp = landingpad { i8*, i32 }
+          filter [0 x i8*] zeroinitializer
+  %ehptr = extractvalue { i8*, i32 } %lp, 0
+  tail call void @__cxa_call_unexpected(i8* %ehptr) noreturn nounwind
+  unreachable
+}
+
+; CHECK: ![[$PROF]] = !{!"branch_weights", i32 2000}
+!0 = !{!"VP", i32 0, i64 2000, i64 -3913987384944532146, i64 2000}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/llvm/test/Transforms/InstCombine/cast-call-combine.ll b/llvm/test/Transforms/InstCombine/cast-call-combine.ll
new file mode 100644
index 00000000000..be70a8763ea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-call-combine.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -always-inline -instcombine -S | FileCheck %s
+
+define internal void @foo(i16*) alwaysinline {
+  ret void
+}
+
+define void @bar() noinline noreturn {
+  unreachable
+}
+
+define void @test() {
+  br i1 false, label %then, label %else
+
+then:
+  call void @bar()
+  unreachable
+
+else:
+  ; CHECK-NOT: call
+  call void bitcast (void (i16*)* @foo to void (i8*)*) (i8* null)
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll b/llvm/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
new file mode 100644
index 00000000000..0f8601b855c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
@@ -0,0 +1,11 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @foo(i32)
+
+define void @g() {
+; CHECK-LABEL: @g(
+ entry:
+; CHECK: call void @foo(i32 0) [ "deopt"() ]
+  call void bitcast (void (i32)* @foo to void ()*) ()  [ "deopt"() ]
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
new file mode 100644
index 00000000000..854c1069eb5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
@@ -0,0 +1,511 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i1 @i32_cast_cmp_oeq_int_0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_n0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_n0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_one_int_0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_one_int_0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_one_int_n0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_one_int_n0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_one_int_0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_one_int_0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_one_int_n0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_one_int_n0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ueq_int_0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ueq_int_n0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ueq_int_0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ueq_int_n0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_une_int_0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_une_int_0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_une_int_n0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_une_int_n0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_une_int_0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_une_int_0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_une_int_n0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_une_int_n0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ogt_int_0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ogt float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ogt_int_n0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ogt float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ogt_int_0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ogt float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ogt_int_n0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ogt float %f, -0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ole_int_0_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ole_int_0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ole float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ole_int_0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ole_int_0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ole float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_olt_int_0_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_olt_int_0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp olt float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_oeq_int_0_uitofp(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i64 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_oeq_int_0_sitofp(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_oeq_int_0_uitofp_half(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_uitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i64 %i to half
+  %cmp = fcmp oeq half %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_oeq_int_0_sitofp_half(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to half
+  %cmp = fcmp oeq half %f, 0.0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(
+; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[I:%.*]] to ppc_fp128
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq ppc_fp128 [[F]], 0xM00000000000000000000000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to ppc_fp128
+  %cmp = fcmp oeq ppc_fp128 %f, 0xM00000000000000000000000000000000
+  ret i1 %cmp
+}
+
+; Since 0xFFFFFF fits in a float, and one less and
+; one more than it also fits without rounding, the
+; test can be optimized to an integer compare.
+
+define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 16777215
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x416FFFFFE0000000
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_i24max_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_sitofp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 16777215
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x416FFFFFE0000000
+  ret i1 %cmp
+}
+
+; Though 0x1000000 fits in a float, one more than it
+; would round to it too, hence a single integer comparison
+; does not suffice.
+
+
+define i1 @i32_cast_cmp_oeq_int_i24maxp1_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_uitofp(
+; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[I:%.*]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[F]], 0x4170000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x4170000000000000
+  ret i1 %cmp
+}
+
+
+define i1 @i32_cast_cmp_oeq_int_i24maxp1_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_sitofp(
+; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[I:%.*]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[F]], 0x4170000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x4170000000000000
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_i32umax_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_uitofp(
+; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[I:%.*]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[F]], 0x41F0000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41F0000000000000
+  ret i1 %cmp
+}
+
+; 32-bit unsigned integer cannot possibly round up to 1<<33
+define i1 @i32_cast_cmp_oeq_int_big_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_big_uitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x4200000000000000
+  ret i1 %cmp
+}
+
+; 32-bit signed integer cannot possibly round up to 1<<32
+define i1 @i32_cast_cmp_oeq_int_i32umax_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_sitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41F0000000000000
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_i32imin_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imin_sitofp(
+; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[I:%.*]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[F]], 0xC1E0000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1E0000000000000
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_i32imax_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imax_uitofp(
+; CHECK-NEXT:    [[F:%.*]] = uitofp i32 [[I:%.*]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[F]], 0x41E0000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41E0000000000000
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_i32imax_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imax_sitofp(
+; CHECK-NEXT:    [[F:%.*]] = sitofp i32 [[I:%.*]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[F]], 0x41E0000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41E0000000000000
+  ret i1 %cmp
+}
+
+; 32-bit signed integer cannot possibly round to -1<<32
+define i1 @i32_cast_cmp_oeq_int_negi32umax_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_negi32umax_sitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1F0000000000000
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_half_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_half_uitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.5
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_half_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_half_sitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.5
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_one_half_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_one_half_uitofp(
+; CHECK-NEXT:    ret i1 true
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.5
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_one_half_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_one_half_sitofp(
+; CHECK-NEXT:    ret i1 true
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.5
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ueq_half_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ueq_half_uitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.5
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ueq_half_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ueq_half_sitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.5
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_une_half_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_une_half_uitofp(
+; CHECK-NEXT:    ret i1 true
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.5
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_une_half_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_une_half_sitofp(
+; CHECK-NEXT:    ret i1 true
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.5
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_inf_uitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_inf_uitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x7FF0000000000000
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_oeq_int_inf_sitofp(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_inf_sitofp(
+; CHECK-NEXT:    ret i1 false
+;
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x7FF0000000000000
+  ret i1 %cmp
+}
+
+; An i128 could round to an IEEE single-precision infinity.
+define i1 @i128_cast_cmp_oeq_int_inf_uitofp(i128 %i) {
+; CHECK-LABEL: @i128_cast_cmp_oeq_int_inf_uitofp(
+; CHECK-NEXT:    [[F:%.*]] = uitofp i128 [[I:%.*]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[F]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i128 %i to float
+  %cmp = fcmp oeq float %f, 0x7FF0000000000000
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll
new file mode 100644
index 00000000000..f18bfe7531c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll
@@ -0,0 +1,709 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This is https://bugs.llvm.org/show_bug.cgi?id=36682
+
+; In *all* of these, sitofp and bitcast should be instcombine'd out.
+; "sle 0" is canonicalized to "slt 1",  so we don't test "sle 0" case.
+; "sge 0" is canonicalized to "sgt -1", so we don't test "sge 0" case.
+; "sge 1" is canonicalized to "sgt 0",  so we don't test "sge 1" case.
+; "sle -1" is canonicalized to "slt 0", so we don't test "sle -1" case.
+
+define i1 @i32_cast_cmp_eq_int_0_sitofp_float(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp eq i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ne_int_0_sitofp_float(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp ne i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_slt_int_0_sitofp_float(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_slt_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp slt i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_sgt_int_0_sitofp_float(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_sgt_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp sgt i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_slt_int_1_sitofp_float(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_slt_int_1_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp slt i32 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_sgt_int_m1_sitofp_float(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp sgt i32 %b, -1
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_eq_int_0_sitofp_double(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp eq i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ne_int_0_sitofp_double(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp ne i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_slt_int_0_sitofp_double(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_slt_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp slt i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_sgt_int_0_sitofp_double(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_sgt_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp sgt i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_slt_int_1_sitofp_double(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_slt_int_1_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp slt i64 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_sgt_int_m1_sitofp_double(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp sgt i64 %b, -1
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_eq_int_0_sitofp_half(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp eq i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_ne_int_0_sitofp_half(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp ne i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_slt_int_0_sitofp_half(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_slt_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp slt i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_sgt_int_0_sitofp_half(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_sgt_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp sgt i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_slt_int_1_sitofp_half(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_slt_int_1_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp slt i16 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i32_cast_cmp_sgt_int_m1_sitofp_half(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_sgt_int_m1_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i32 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp sgt i16 %b, -1
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_eq_int_0_sitofp_float(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_eq_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp eq i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_ne_int_0_sitofp_float(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_ne_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp ne i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_slt_int_0_sitofp_float(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_slt_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp slt i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_sgt_int_0_sitofp_float(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_sgt_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp sgt i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_slt_int_1_sitofp_float(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp slt i32 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_sgt_int_m1_sitofp_float(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_sgt_int_m1_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp sgt i32 %b, -1
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_eq_int_0_sitofp_double(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_eq_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp eq i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_ne_int_0_sitofp_double(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_ne_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp ne i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_slt_int_0_sitofp_double(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_slt_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp slt i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_sgt_int_0_sitofp_double(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_sgt_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp sgt i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_slt_int_1_sitofp_double(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp slt i64 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_sgt_int_m1_sitofp_double(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_sgt_int_m1_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp sgt i64 %b, -1
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_eq_int_0_sitofp_half(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_eq_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp eq i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_ne_int_0_sitofp_half(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_ne_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp ne i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_slt_int_0_sitofp_half(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_slt_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp slt i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_sgt_int_0_sitofp_half(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_sgt_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp sgt i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_slt_int_1_sitofp_half(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp slt i16 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i64_cast_cmp_sgt_int_m1_sitofp_half(i64 %i) {
+; CHECK-LABEL: @i64_cast_cmp_sgt_int_m1_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i64 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp sgt i16 %b, -1
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_eq_int_0_sitofp_float(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_eq_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp eq i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_ne_int_0_sitofp_float(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_ne_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp ne i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_slt_int_0_sitofp_float(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_slt_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp slt i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_sgt_int_0_sitofp_float(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_0_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp sgt i32 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_slt_int_1_sitofp_float(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_slt_int_1_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp slt i32 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_sgt_int_m1_sitofp_float(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp sgt i32 %b, -1
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_eq_int_0_sitofp_double(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_eq_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp eq i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_ne_int_0_sitofp_double(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_ne_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp ne i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_slt_int_0_sitofp_double(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_slt_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp slt i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_sgt_int_0_sitofp_double(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_0_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp sgt i64 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_slt_int_1_sitofp_double(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_slt_int_1_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp slt i64 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_sgt_int_m1_sitofp_double(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp sgt i64 %b, -1
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_eq_int_0_sitofp_half(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_eq_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp eq i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_ne_int_0_sitofp_half(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_ne_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp ne i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_slt_int_0_sitofp_half(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_slt_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp slt i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_sgt_int_0_sitofp_half(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_0_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp sgt i16 %b, 0
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_slt_int_1_sitofp_half(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_slt_int_1_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[I:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp slt i16 %b, 1
+  ret i1 %cmp
+}
+
+define i1 @i16_cast_cmp_sgt_int_m1_sitofp_half(i16 %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[I:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp i16 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp sgt i16 %b, -1
+  ret i1 %cmp
+}
+
+; Verify that vector types and vector constants including undef elements are transformed too.
+
+define <3 x i1> @i32_cast_cmp_ne_int_0_sitofp_double_vec(<3 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_sitofp_double_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <3 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = sitofp <3 x i32> %i to  <3 x double>
+  %b = bitcast <3 x double> %f to <3 x i64>
+  %cmp = icmp ne <3 x i64> %b, <i64 0, i64 0, i64 0>
+  ret <3 x i1> %cmp
+}
+
+; TODO: Can we propagate the constant vector with undef element?
+
+define <3 x i1> @i32_cast_cmp_eq_int_0_sitofp_float_vec_undef(<3 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_sitofp_float_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <3 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = sitofp <3 x i32> %i to  <3 x float>
+  %b = bitcast <3 x float> %f to <3 x i32>
+  %cmp = icmp eq <3 x i32> %b, <i32 0, i32 undef, i32 0>
+  ret <3 x i1> %cmp
+}
+
+define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_undef(<3 x i64> %i) {
+; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_half_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <3 x i64> [[I:%.*]], <i64 1, i64 1, i64 1>
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = sitofp <3 x i64> %i to  <3 x half>
+  %b = bitcast <3 x half> %f to <3 x i16>
+  %cmp = icmp slt <3 x i16> %b, <i16 1, i16 undef, i16 1>
+  ret <3 x i1> %cmp
+}
+
+define <3 x i1> @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_undef(<3 x i16> %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <3 x i16> [[I:%.*]], <i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = sitofp <3 x i16> %i to  <3 x float>
+  %b = bitcast <3 x float> %f to <3 x i32>
+  %cmp = icmp sgt <3 x i32> %b, <i32 -1, i32 undef, i32 -1>
+  ret <3 x i1> %cmp
+}
+
+; Verify that the various forms of this transform are not applied when the
+; bitcast changes the number of vector elements:
+;   icmp (bitcast ([su]itofp X)), Y -> icmp X, Y
+
+define <6 x i1> @i16_cast_cmp_sgt_int_m1_bitcast_vector_num_elements_sitofp(<3 x i16> %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_bitcast_vector_num_elements_sitofp(
+; CHECK-NEXT:    [[F:%.*]] = sitofp <3 x i16> [[I:%.*]] to <3 x float>
+; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x float> [[F]] to <6 x i16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <6 x i16> [[B]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    ret <6 x i1> [[CMP]]
+;
+  %f = sitofp <3 x i16> %i to  <3 x float>
+  %b = bitcast <3 x float> %f to <6 x i16>
+  %cmp = icmp sgt <6 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  ret <6 x i1> %cmp
+}
+
+define i1 @i16_cast_cmp_sgt_int_m1_bitcast_vector_to_scalar_sitofp(<3 x i16> %i) {
+; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_bitcast_vector_to_scalar_sitofp(
+; CHECK-NEXT:    [[F:%.*]] = sitofp <3 x i16> [[I:%.*]] to <3 x float>
+; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x float> [[F]] to i96
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i96 [[B]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = sitofp <3 x i16> %i to  <3 x float>
+  %b = bitcast <3 x float> %f to i96
+  %cmp = icmp sgt i96 %b, -1
+  ret i1 %cmp
+}
+
+
+define <6 x i1> @i16_cast_cmp_eq_int_0_bitcast_vector_num_elements_uitofp(<3 x i16> %i) {
+; CHECK-LABEL: @i16_cast_cmp_eq_int_0_bitcast_vector_num_elements_uitofp(
+; CHECK-NEXT:    [[F:%.*]] = uitofp <3 x i16> [[I:%.*]] to <3 x float>
+; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x float> [[F]] to <6 x i16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <6 x i16> [[B]], zeroinitializer
+; CHECK-NEXT:    ret <6 x i1> [[CMP]]
+;
+  %f = uitofp <3 x i16> %i to <3 x float>
+  %b = bitcast <3 x float> %f to <6 x i16>
+  %cmp = icmp eq <6 x i16> %b, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+  ret <6 x i1> %cmp
+}
+
+define i1 @i16_cast_cmp_eq_int_0_bitcast_vector_to_scalar_uitofp(<3 x i16> %i) {
+; CHECK-LABEL: @i16_cast_cmp_eq_int_0_bitcast_vector_to_scalar_uitofp(
+; CHECK-NEXT:    [[F:%.*]] = uitofp <3 x i16> [[I:%.*]] to <3 x float>
+; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x float> [[F]] to i96
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i96 [[B]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp <3 x i16> %i to <3 x float>
+  %b = bitcast <3 x float> %f to i96
+  %cmp = icmp eq i96 %b, 0
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll
new file mode 100644
index 00000000000..c501fd8d04c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll
@@ -0,0 +1,181 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt -debugify -instcombine -S < %s | FileCheck %s -check-prefix DBGINFO
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32"
+
+define i32 @mul(i32 %x, i32 %y) {
+; CHECK-LABEL: @mul(
+; CHECK-NEXT:    [[C:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = and i32 [[C]], 255
+; CHECK-NEXT:    ret i32 [[D]]
+
+; Test that when zext is evaluated in different type
+; we preserve the debug information in the resulting
+; instruction.
+; DBGINFO-LABEL: @mul(
+; DBGINFO-NEXT:    [[C:%.*]] = mul i32 {{.*}}
+; DBGINFO-NEXT:    [[D:%.*]] = and i32 {{.*}}
+; DBGINFO-NEXT:    call void @llvm.dbg.value(metadata i32 [[C]]
+; DBGINFO-NEXT:    call void @llvm.dbg.value(metadata i32 [[D]]
+
+  %A = trunc i32 %x to i8
+  %B = trunc i32 %y to i8
+  %C = mul i8 %A, %B
+  %D = zext i8 %C to i32
+  ret i32 %D
+}
+
+define i32 @select1(i1 %cond, i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select1(
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[COND:%.*]], i32 [[Z:%.*]], i32 [[D]]
+; CHECK-NEXT:    [[F:%.*]] = and i32 [[E]], 255
+; CHECK-NEXT:    ret i32 [[F]]
+;
+  %A = trunc i32 %x to i8
+  %B = trunc i32 %y to i8
+  %C = trunc i32 %z to i8
+  %D = add i8 %A, %B
+  %E = select i1 %cond, i8 %C, i8 %D
+  %F = zext i8 %E to i32
+  ret i32 %F
+}
+
+define i8 @select2(i1 %cond, i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @select2(
+; CHECK-NEXT:    [[D:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[COND:%.*]], i8 [[Z:%.*]], i8 [[D]]
+; CHECK-NEXT:    ret i8 [[E]]
+;
+  %A = zext i8 %x to i32
+  %B = zext i8 %y to i32
+  %C = zext i8 %z to i32
+  %D = add i32 %A, %B
+  %E = select i1 %cond, i32 %C, i32 %D
+  %F = trunc i32 %E to i8
+  ret i8 %F
+}
+
+; The next 3 tests could be handled in instcombine, but evaluating values
+; with multiple uses may be very slow. Let some other pass deal with it.
+
+define i32 @eval_trunc_multi_use_in_one_inst(i32 %x) {
+; CHECK-LABEL: @eval_trunc_multi_use_in_one_inst(
+; CHECK-NEXT:    [[Z:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[A:%.*]] = add nuw nsw i64 [[Z]], 15
+; CHECK-NEXT:    [[M:%.*]] = mul i64 [[A]], [[A]]
+; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[M]] to i32
+; CHECK-NEXT:    ret i32 [[T]]
+;
+  %z = zext i32 %x to i64
+  %a = add nsw nuw i64 %z, 15
+  %m = mul i64 %a, %a
+  %t = trunc i64 %m to i32
+  ret i32 %t
+}
+
+define i32 @eval_zext_multi_use_in_one_inst(i32 %x) {
+; CHECK-LABEL: @eval_zext_multi_use_in_one_inst(
+; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i16
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[T]], 5
+; CHECK-NEXT:    [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]]
+; CHECK-NEXT:    [[R:%.*]] = zext i16 [[M]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %t = trunc i32 %x to i16
+  %a = and i16 %t, 5
+  %m = mul nuw nsw i16 %a, %a
+  %r = zext i16 %m to i32
+  ret i32 %r
+}
+
+define i32 @eval_sext_multi_use_in_one_inst(i32 %x) {
+; CHECK-LABEL: @eval_sext_multi_use_in_one_inst(
+; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i16
+; CHECK-NEXT:    [[A:%.*]] = and i16 [[T]], 14
+; CHECK-NEXT:    [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]]
+; CHECK-NEXT:    [[O:%.*]] = or i16 [[M]], -32768
+; CHECK-NEXT:    [[R:%.*]] = sext i16 [[O]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %t = trunc i32 %x to i16
+  %a = and i16 %t, 14
+  %m = mul nuw nsw i16 %a, %a
+  %o = or i16 %m, 32768
+  %r = sext i16 %o to i32
+  ret i32 %r
+}
+
+; If we have a transform to shrink the above 3 cases, make sure it's not
+; also trying to look through multiple uses in this test and crashing.
+
+define void @PR36225(i32 %a, i32 %b) {
+; CHECK-LABEL: @PR36225(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY3_US:%.*]], label [[FOR_BODY3:%.*]]
+; CHECK:       for.body3.us:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i8 0, i8 4
+; CHECK-NEXT:    switch i3 undef, label [[EXIT:%.*]] [
+; CHECK-NEXT:    i3 0, label [[FOR_END:%.*]]
+; CHECK-NEXT:    i3 -1, label [[FOR_END]]
+; CHECK-NEXT:    ]
+; CHECK:       for.body3:
+; CHECK-NEXT:    switch i3 undef, label [[EXIT]] [
+; CHECK-NEXT:    i3 0, label [[FOR_END]]
+; CHECK-NEXT:    i3 -1, label [[FOR_END]]
+; CHECK-NEXT:    ]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[H:%.*]] = phi i8 [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ 0, [[FOR_BODY3]] ], [ 0, [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[H]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[A:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT]], label [[EXIT2:%.*]]
+; CHECK:       exit2:
+; CHECK-NEXT:    unreachable
+; CHECK:       exit:
+; CHECK-NEXT:    unreachable
+;
+entry:
+  br label %while.body
+
+while.body:
+  %tobool = icmp eq i32 %b, 0
+  br i1 undef, label %for.body3.us, label %for.body3
+
+for.body3.us:
+  %spec.select = select i1 %tobool, i8 0, i8 4
+  switch i3 undef, label %exit [
+  i3 0, label %for.end
+  i3 -1, label %for.end
+  ]
+
+for.body3:
+  switch i3 undef, label %exit [
+  i3 0, label %for.end
+  i3 -1, label %for.end
+  ]
+
+for.end:
+  %h = phi i8 [ %spec.select, %for.body3.us ], [ %spec.select, %for.body3.us ], [ 0, %for.body3 ], [ 0, %for.body3 ]
+  %conv = sext i8 %h to i32
+  %cmp = icmp sgt i32 %a, %conv
+  br i1 %cmp, label %exit, label %exit2
+
+exit2:
+  unreachable
+
+exit:
+  unreachable
+}
+
+; Check that we don't drop debug info when a zext is removed.
+define i1 @foo(i1 zeroext %b) {
+; DBGINFO-LABEL: @foo(
+; DBGINFO-NEXT:  call void @llvm.dbg.value(metadata i1 %b
+; DBGINFO-NEXT:  ret i1 %b
+
+  %frombool = zext i1 %b to i8 
+  ret i1 %b
+}
diff --git a/llvm/test/Transforms/InstCombine/cast-select.ll b/llvm/test/Transforms/InstCombine/cast-select.ll
new file mode 100644
index 00000000000..189c6c33a70
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-select.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @zext(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @zext(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i32 [[SEL]] to i64
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %cmp = icmp eq i32 %x, %y
+  %sel = select i1 %cmp, i32 0, i32 %z
+  %r = zext i32 %sel to i64
+  ret i64 %r
+}
+
+define <2 x i32> @zext_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @zext_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[Z:%.*]], <2 x i8> <i8 42, i8 7>
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[SEL]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %cmp = icmp ugt <2 x i8> %x, %y
+  %sel = select <2 x i1> %cmp, <2 x i8> %z, <2 x i8> <i8 42, i8 7>
+  %r = zext <2 x i8> %sel to <2 x i32>
+  ret <2 x i32> %r
+}
+
+define i64 @sext(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @sext(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i8 42, i8 [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sext i8 [[SEL]] to i64
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %cmp = icmp ult i8 %x, %y
+  %sel = select i1 %cmp, i8 42, i8 %z
+  %r = sext i8 %sel to i64
+  ret i64 %r
+}
+
+define <2 x i32> @sext_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @sext_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[Z:%.*]], <2 x i8> <i8 42, i8 7>
+; CHECK-NEXT:    [[R:%.*]] = sext <2 x i8> [[SEL]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %cmp = icmp ugt <2 x i8> %x, %y
+  %sel = select <2 x i1> %cmp, <2 x i8> %z, <2 x i8> <i8 42, i8 7>
+  %r = sext <2 x i8> %sel to <2 x i32>
+  ret <2 x i32> %r
+}
+
+define i16 @trunc(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @trunc(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 42, i32 [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %cmp = icmp ult i32 %x, %y
+  %sel = select i1 %cmp, i32 42, i32 %z
+  %r = trunc i32 %sel to i16
+  ret i16 %r
+}
+
+define <2 x i32> @trunc_vec(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: @trunc_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i64> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[Z:%.*]], <2 x i64> <i64 42, i64 7>
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[SEL]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %cmp = icmp ugt <2 x i64> %x, %y
+  %sel = select <2 x i1> %cmp, <2 x i64> %z, <2 x i64> <i64 42, i64 7>
+  %r = trunc <2 x i64> %sel to <2 x i32>
+  ret <2 x i32> %r
+}
+
+define double @fpext(float %x, float %y, float %z) {
+; CHECK-LABEL: @fpext(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], float 1.700000e+01, float [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fpext float [[SEL]] to double
+; CHECK-NEXT:    ret double [[R]]
+;
+  %cmp = fcmp oeq float %x, %y
+  %sel = select i1 %cmp, float 17.0, float %z
+  %r = fpext float %sel to double
+  ret double %r
+}
+
+define <2 x double> @fpext_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fpext_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ugt <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x float> [[Z:%.*]], <2 x float> <float 4.200000e+01, float -2.000000e+00>
+; CHECK-NEXT:    [[R:%.*]] = fpext <2 x float> [[SEL]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %cmp = fcmp ugt <2 x float> %x, %y
+  %sel = select <2 x i1> %cmp, <2 x float> %z, <2 x float> <float 42.0, float -2.0>
+  %r = fpext <2 x float> %sel to <2 x double>
+  ret <2 x double> %r
+}
+
+define float @fptrunc(double %x, double %y, double %z) {
+; CHECK-LABEL: @fptrunc(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], double 4.200000e+01, double [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fptrunc double [[SEL]] to float
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp = fcmp ult double %x, %y
+  %sel = select i1 %cmp, double 42.0, double %z
+  %r = fptrunc double %sel to float
+  ret float %r
+}
+
+define <2 x float> @fptrunc_vec(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: @fptrunc_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <2 x double> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[Z:%.*]], <2 x double> <double -4.200000e+01, double 1.200000e+01>
+; CHECK-NEXT:    [[R:%.*]] = fptrunc <2 x double> [[SEL]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %cmp = fcmp oge <2 x double> %x, %y
+  %sel = select <2 x i1> %cmp, <2 x double> %z, <2 x double> <double -42.0, double 12.0>
+  %r = fptrunc <2 x double> %sel to <2 x float>
+  ret <2 x float> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll b/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll
new file mode 100644
index 00000000000..4d780aa9081
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll
@@ -0,0 +1,50 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK-LABEL: define {{.*}} @test5
+define i16 @test5(i16 %A) !dbg !34 {
+  ; CHECK: [[and:%.*]] = and i16 %A, 15
+
+  %B = sext i16 %A to i32, !dbg !40
+  call void @llvm.dbg.value(metadata i32 %B, metadata !36, metadata !DIExpression()), !dbg !40
+
+  %C = and i32 %B, 15, !dbg !41
+  call void @llvm.dbg.value(metadata i32 %C, metadata !37, metadata !DIExpression()), !dbg !41
+
+  ; Preserve the dbg.value for the DCE'd 32-bit 'and'.
+  ;
+  ; The high 16 bits of the original 'and' require sign-extending the new 16-bit and:
+  ; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[and]], metadata [[C:![0-9]+]],
+  ; CHECK-SAME:    metadata !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)
+
+  %D = trunc i32 %C to i16, !dbg !42
+  call void @llvm.dbg.value(metadata i16 %D, metadata !38, metadata !DIExpression()), !dbg !42
+
+  ; The dbg.value for a truncate should simply point to the result of the 16-bit 'and'.
+  ; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[and]], metadata [[D:![0-9]+]], metadata !DIExpression())
+
+  ret i16 %D, !dbg !43
+  ; CHECK-NEXT: ret i16 [[and]]
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "void", directory: "/")
+!2 = !{}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = !DISubroutineType(types: !2)
+!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_signed)
+!12 = !DIBasicType(name: "ty8", size: 8, encoding: DW_ATE_signed)
+!34 = distinct !DISubprogram(name: "test5", linkageName: "test5", scope: null, file: !1, line: 12, type: !7, isLocal: false, isDefinition: true, scopeLine: 12, isOptimized: true, unit: !0, retainedNodes: !35)
+!35 = !{!36, !37, !38}
+!36 = !DILocalVariable(name: "B", scope: !34, file: !1, line: 12, type: !10)
+!37 = !DILocalVariable(name: "C", scope: !34, file: !1, line: 13, type: !10)
+!38 = !DILocalVariable(name: "D", scope: !34, file: !1, line: 14, type: !39)
+!39 = !DIBasicType(name: "ty16", size: 16, encoding: DW_ATE_signed)
+!40 = !DILocation(line: 12, column: 1, scope: !34)
+!41 = !DILocation(line: 13, column: 1, scope: !34)
+!42 = !DILocation(line: 14, column: 1, scope: !34)
+!43 = !DILocation(line: 15, column: 1, scope: !34)
diff --git a/llvm/test/Transforms/InstCombine/cast-set.ll b/llvm/test/Transforms/InstCombine/cast-set.ll
new file mode 100644
index 00000000000..6da6dc3236c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-set.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i1 @test1(i32 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 %X, 12
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = bitcast i32 %X to i32
+  ; Convert to setne int %X, 12
+  %c = icmp ne i32 %A, 12
+  ret i1 %c
+}
+
+define i1 @test2(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 %X, %Y
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = bitcast i32 %X to i32
+  %B = bitcast i32 %Y to i32
+  ; Convert to setne int %X, %Y
+  %c = icmp ne i32 %A, %B
+  ret i1 %c
+}
+
+define i32 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[C:%.*]] = shl i32 %A, 2
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = bitcast i32 %A to i32
+  %C = shl i32 %B, 2
+  %D = bitcast i32 %C to i32
+  ret i32 %D
+}
+
+define i16 @test5(i16 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[C:%.*]] = and i16 %A, 15
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %B = sext i16 %A to i32
+  %C = and i32 %B, 15
+  %D = trunc i32 %C to i16
+  ret i16 %D
+}
+
+define i1 @test6(i1 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i1 %A
+;
+  %B = zext i1 %A to i32
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
+
+define i1 @test6a(i1 %A) {
+; CHECK-LABEL: @test6a(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = zext i1 %A to i32
+  %C = icmp ne i32 %B, -1
+  ret i1 %C
+}
+
+define i1 @test7(i8* %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8* %A, null
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = bitcast i8* %A to i32*
+  %C = icmp eq i32* %B, null
+  ret i1 %C
+}
diff --git a/llvm/test/Transforms/InstCombine/cast-unsigned-icmp-eqcmp-0.ll b/llvm/test/Transforms/InstCombine/cast-unsigned-icmp-eqcmp-0.ll
new file mode 100644
index 00000000000..e1fa27256b1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast-unsigned-icmp-eqcmp-0.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This is related to https://bugs.llvm.org/show_bug.cgi?id=36682
+
+; In *all* of these, uitofp and bitcast should be instcombine'd out.
+
+define i1 @i32_cast_cmp_eq_int_0_uitofp_float(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp eq i32 %b, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @i32_cast_cmp_eq_int_0_uitofp_float_vec(<2 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_float_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %f = uitofp <2 x i32> %i to  <2 x float>
+  %b = bitcast <2 x float> %f to <2 x i32>
+  %cmp = icmp eq <2 x i32> %b, <i32 0, i32 0>
+  ret <2 x i1> %cmp
+}
+
+define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_float_vec_undef(<3 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_float_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <3 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = uitofp <3 x i32> %i to <3 x float>
+  %b = bitcast <3 x float> %f to <3 x i32>
+  %cmp = icmp eq <3 x i32> %b, <i32 0, i32 undef, i32 0>
+  ret <3 x i1> %cmp
+}
+
+define i1 @i32_cast_cmp_ne_int_0_uitofp_float(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_float(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to float
+  %b = bitcast float %f to i32
+  %cmp = icmp ne i32 %b, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @i32_cast_cmp_ne_int_0_uitofp_float_vec(<2 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_float_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %f = uitofp <2 x i32> %i to  <2 x float>
+  %b = bitcast <2 x float> %f to <2 x i32>
+  %cmp = icmp ne <2 x i32> %b, <i32 0, i32 0>
+  ret <2 x i1> %cmp
+}
+
+define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_float_vec_undef(<3 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_float_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <3 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = uitofp <3 x i32> %i to <3 x float>
+  %b = bitcast <3 x float> %f to <3 x i32>
+  %cmp = icmp ne <3 x i32> %b, <i32 0, i32 undef, i32 0>
+  ret <3 x i1> %cmp
+}
+
+define i1 @i32_cast_cmp_eq_int_0_uitofp_double(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp eq i64 %b, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @i32_cast_cmp_eq_int_0_uitofp_double_vec(<2 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_double_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %f = uitofp <2 x i32> %i to  <2 x double>
+  %b = bitcast <2 x double> %f to <2 x i64>
+  %cmp = icmp eq <2 x i64> %b, <i64 0, i64 0>
+  ret <2 x i1> %cmp
+}
+
+define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_double_vec_undef(<3 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_double_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <3 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = uitofp <3 x i32> %i to <3 x double>
+  %b = bitcast <3 x double> %f to <3 x i64>
+  %cmp = icmp eq <3 x i64> %b, <i64 0, i64 undef, i64 0>
+  ret <3 x i1> %cmp
+}
+
+define i1 @i32_cast_cmp_ne_int_0_uitofp_double(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_double(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to double
+  %b = bitcast double %f to i64
+  %cmp = icmp ne i64 %b, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @i32_cast_cmp_ne_int_0_uitofp_double_vec(<2 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_double_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %f = uitofp <2 x i32> %i to  <2 x double>
+  %b = bitcast <2 x double> %f to <2 x i64>
+  %cmp = icmp ne <2 x i64> %b, <i64 0, i64 0>
+  ret <2 x i1> %cmp
+}
+
+define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_double_vec_undef(<3 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_double_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <3 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = uitofp <3 x i32> %i to <3 x double>
+  %b = bitcast <3 x double> %f to <3 x i64>
+  %cmp = icmp ne <3 x i64> %b, <i64 0, i64 undef, i64 0>
+  ret <3 x i1> %cmp
+}
+
+define i1 @i32_cast_cmp_eq_int_0_uitofp_half(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp eq i16 %b, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @i32_cast_cmp_eq_int_0_uitofp_half_vec(<2 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_half_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %f = uitofp <2 x i32> %i to  <2 x half>
+  %b = bitcast <2 x half> %f to <2 x i16>
+  %cmp = icmp eq <2 x i16> %b, <i16 0, i16 0>
+  ret <2 x i1> %cmp
+}
+
+define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_half_vec_undef(<3 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_half_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <3 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = uitofp <3 x i32> %i to <3 x half>
+  %b = bitcast <3 x half> %f to <3 x i16>
+  %cmp = icmp eq <3 x i16> %b, <i16 0, i16 undef, i16 0>
+  ret <3 x i1> %cmp
+}
+
+define i1 @i32_cast_cmp_ne_int_0_uitofp_half(i32 %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_half(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %f = uitofp i32 %i to half
+  %b = bitcast half %f to i16
+  %cmp = icmp ne i16 %b, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @i32_cast_cmp_ne_int_0_uitofp_half_vec(<2 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_half_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %f = uitofp <2 x i32> %i to  <2 x half>
+  %b = bitcast <2 x half> %f to <2 x i16>
+  %cmp = icmp ne <2 x i16> %b, <i16 0, i16 0>
+  ret <2 x i1> %cmp
+}
+
+define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_half_vec_undef(<3 x i32> %i) {
+; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_half_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <3 x i32> [[I:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %f = uitofp <3 x i32> %i to <3 x half>
+  %b = bitcast <3 x half> %f to <3 x i16>
+  %cmp = icmp ne <3 x i16> %b, <i16 0, i16 undef, i16 0>
+  ret <3 x i1> %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
new file mode 100644
index 00000000000..b6d1eda0601
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -0,0 +1,1561 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Tests to make sure elimination of casts is working correctly
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
+
+@inbuf = external global [32832 x i8]
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %c1 = bitcast i32 %A to i32
+  %c2 = bitcast i32 %c1 to i32
+  ret i32 %c2
+}
+
+define i64 @test2(i8 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[RET:%.*]] = zext i8 [[A:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[RET]]
+;
+  %c1 = zext i8 %A to i16
+  %c2 = zext i16 %c1 to i32
+  %Ret = zext i32 %c2 to i64
+  ret i64 %Ret
+}
+
+define i64 @test3(i64 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[C2:%.*]] = and i64 [[A:%.*]], 255
+; CHECK-NEXT:    ret i64 [[C2]]
+;
+  %c1 = trunc i64 %A to i8
+  %c2 = zext i8 %c1 to i64
+  ret i64 %c2
+}
+
+define i32 @test4(i32 %A, i32 %B) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[RESULT:%.*]] = zext i1 [[COND]] to i32
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+  %COND = icmp slt i32 %A, %B
+  %c = zext i1 %COND to i8
+  %result = zext i8 %c to i32
+  ret i32 %result
+}
+
+define i32 @test5(i1 %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[RESULT:%.*]] = zext i1 [[B:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+  %c = zext i1 %B to i8
+  %result = zext i8 %c to i32
+  ret i32 %result
+}
+
+define i32 @test6(i64 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C1:%.*]] = trunc i64 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C1]]
+;
+  %c1 = trunc i64 %A to i32
+  %res = bitcast i32 %c1 to i32
+  ret i32 %res
+}
+
+define i64 @test7(i1 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[RES:%.*]] = zext i1 [[A:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+  %c1 = zext i1 %A to i32
+  %res = sext i32 %c1 to i64
+  ret i64 %res
+}
+
+define i64 @test8(i8 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[C1:%.*]] = sext i8 [[A:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[C1]]
+;
+  %c1 = sext i8 %A to i64
+  %res = bitcast i64 %c1 to i64
+  ret i64 %res
+}
+
+define i16 @test9(i16 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i16 [[A:%.*]]
+;
+  %c1 = sext i16 %A to i32
+  %c2 = trunc i32 %c1 to i16
+  ret i16 %c2
+}
+
+define i16 @test10(i16 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i16 [[A:%.*]]
+;
+  %c1 = sext i16 %A to i32
+  %c2 = trunc i32 %c1 to i16
+  ret i16 %c2
+}
+
+declare void @varargs(i32, ...)
+
+define void @test11(i32* %P) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    call void (i32, ...) @varargs(i32 5, i32* [[P:%.*]])
+; CHECK-NEXT:    ret void
+;
+  %c = bitcast i32* %P to i16*
+  call void (i32, ...) @varargs( i32 5, i16* %c )
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+define void @test_invoke_vararg_cast(i32* %a, i32* %b) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @test_invoke_vararg_cast(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void (i32, ...) @varargs(i32 1, i32* [[B:%.*]], i32* [[A:%.*]])
+; CHECK-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont:
+; CHECK-NEXT:    ret void
+; CHECK:       lpad:
+; CHECK-NEXT:    [[TMP0:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast i32* %b to i8*
+  %1 = bitcast i32* %a to i64*
+  invoke void (i32, ...) @varargs(i32 1, i8* %0, i64* %1)
+  to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret void
+
+lpad:
+  %2 = landingpad { i8*, i32 }
+  cleanup
+  ret void
+}
+
+define i8* @test13(i64 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[C:%.*]] = getelementptr [32832 x i8], [32832 x i8]* @inbuf, i64 0, i64 [[A:%.*]]
+; CHECK-NEXT:    ret i8* [[C]]
+;
+  %c = getelementptr [0 x i8], [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A
+  ret i8* %c
+}
+
+define i1 @test14(i8 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[X:%.*]] = icmp sgt i8 [[A:%.*]], -1
+; CHECK-NEXT:    ret i1 [[X]]
+;
+  %c = bitcast i8 %A to i8
+  %X = icmp ult i8 %c, -128
+  ret i1 %X
+}
+
+
+; This just won't occur when there's no difference between ubyte and sbyte
+;bool %test15(ubyte %A) {
+;        %c = cast ubyte %A to sbyte
+;        %X = setlt sbyte %c, 0   ; setgt %A, 127
+;        ret bool %X
+;}
+
+define i1 @test16(i32* %P) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32* [[P:%.*]], null
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %c = icmp ne i32* %P, null
+  ret i1 %c
+}
+
+define i16 @test17(i1 %x) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[T86:%.*]] = zext i1 [[X:%.*]] to i16
+; CHECK-NEXT:    ret i16 [[T86]]
+;
+  %c = zext i1 %x to i32
+  %t86 = trunc i32 %c to i16
+  ret i16 %t86
+}
+
+define i16 @test18(i8 %x) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[T86:%.*]] = sext i8 [[X:%.*]] to i16
+; CHECK-NEXT:    ret i16 [[T86]]
+;
+  %c = sext i8 %x to i32
+  %t86 = trunc i32 %c to i16
+  ret i16 %t86
+}
+
+define i1 @test19(i32 %X) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt i32 [[X:%.*]], 12345
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %c = sext i32 %X to i64
+  %Z = icmp slt i64 %c, 12345
+  ret i1 %Z
+}
+
+define <2 x i1> @test19vec(<2 x i32> %X) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 12345, i32 2147483647>
+; CHECK-NEXT:    ret <2 x i1> [[Z]]
+;
+  %c = sext <2 x i32> %X to <2 x i64>
+  %Z = icmp slt <2 x i64> %c, <i64 12345, i64 2147483647>
+  ret <2 x i1> %Z
+}
+
+define <3 x i1> @test19vec2(<3 x i1> %X) {
+; CHECK-LABEL: @test19vec2(
+; CHECK-NEXT:    [[CMPEQ:%.*]] = xor <3 x i1> [[X:%.*]], <i1 true, i1 true, i1 true>
+; CHECK-NEXT:    ret <3 x i1> [[CMPEQ]]
+;
+  %sext = sext <3 x i1> %X to <3 x i32>
+  %cmpeq = icmp eq <3 x i32> %sext, zeroinitializer
+  ret <3 x i1> %cmpeq
+}
+
+define i1 @test20(i1 %B) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    ret i1 false
+;
+  %c = zext i1 %B to i32
+  %D = icmp slt i32 %c, -1
+  ret i1 %D
+}
+
+define i32 @test21(i32 %X) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %c1 = trunc i32 %X to i8
+  %c2 = sext i8 %c1 to i32
+  %RV = and i32 %c2, 255
+  ret i32 %RV
+}
+
+define i32 @test22(i32 %X) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i32 [[X:%.*]], 24
+; CHECK-NEXT:    ret i32 [[SEXT]]
+;
+  %c1 = trunc i32 %X to i8
+  %c2 = sext i8 %c1 to i32
+  %RV = shl i32 %c2, 24
+  ret i32 %RV
+}
+
+define i32 @test23(i32 %X) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[C2:%.*]] = and i32 [[X:%.*]], 65535
+; CHECK-NEXT:    ret i32 [[C2]]
+;
+  %c1 = trunc i32 %X to i16
+  %c2 = zext i16 %c1 to i32
+  ret i32 %c2
+}
+
+define i1 @test24(i1 %C) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    ret i1 true
+;
+  %X = select i1 %C, i32 14, i32 1234
+  %c = icmp ne i32 %X, 0
+  ret i1 %c
+}
+
+define i32 @test26(float %F) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[D:%.*]] = fptosi float [[F:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %c = fpext float %F to double
+  %D = fptosi double %c to i32
+  ret i32 %D
+}
+
+define [4 x float]* @test27([9 x [4 x float]]* %A) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [9 x [4 x float]], [9 x [4 x float]]* [[A:%.*]], i64 0, i64 0
+; CHECK-NEXT:    ret [4 x float]* [[C]]
+;
+  %c = bitcast [9 x [4 x float]]* %A to [4 x float]*
+  ret [4 x float]* %c
+}
+
+define float* @test28([4 x float]* %A) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[A:%.*]], i64 0, i64 0
+; CHECK-NEXT:    ret float* [[C]]
+;
+  %c = bitcast [4 x float]* %A to float*
+  ret float* %c
+}
+
+define i32 @test29(i32 %c1, i32 %c2) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[T21:%.*]] = or i32 [[C2:%.*]], [[C1:%.*]]
+; CHECK-NEXT:    [[T10:%.*]] = and i32 [[T21]], 255
+; CHECK-NEXT:    ret i32 [[T10]]
+;
+  %t1 = trunc i32 %c1 to i8
+  %tmask = trunc i32 %c2 to i8
+  %t2 = or i8 %tmask, %t1
+  %t10 = zext i8 %t2 to i32
+  ret i32 %t10
+}
+
+define i32 @test30(i32 %c1) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[C3:%.*]] = and i32 [[C1:%.*]], 255
+; CHECK-NEXT:    [[C4:%.*]] = xor i32 [[C3]], 1
+; CHECK-NEXT:    ret i32 [[C4]]
+;
+  %c2 = trunc i32 %c1 to i8
+  %c3 = xor i8 %c2, 1
+  %c4 = zext i8 %c3 to i32
+  ret i32 %c4
+}
+
+define i1 @test31(i64 %A) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[C1:%.*]] = and i64 [[A:%.*]], 42
+; CHECK-NEXT:    [[D:%.*]] = icmp eq i64 [[C1]], 10
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %B = trunc i64 %A to i32
+  %C = and i32 %B, 42
+  %D = icmp eq i32 %C, 10
+  ret i1 %D
+}
+
+; FIXME: Vectors should fold too...or not?
+; Does this depend on the whether the source/dest types of the trunc are legal in the data layout?
+define <2 x i1> @test31vec(<2 x i64> %A) {
+; CHECK-LABEL: @test31vec(
+; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i32> [[B]], <i32 42, i32 42>
+; CHECK-NEXT:    [[D:%.*]] = icmp eq <2 x i32> [[C]], <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = trunc <2 x i64> %A to <2 x i32>
+  %C = and <2 x i32> %B, <i32 42, i32 42>
+  %D = icmp eq <2 x i32> %C, <i32 10, i32 10>
+  ret <2 x i1> %D
+}
+
+; Verify that the 'and' was narrowed, the zext was eliminated, and the compare was narrowed
+; even for vectors. Earlier folds should ensure that the icmp(and(zext)) pattern never occurs.
+
+define <2 x i1> @test32vec(<2 x i8> %A) {
+; CHECK-LABEL: @test32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 10, i8 10>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = zext <2 x i8> %A to <2 x i16>
+  %C = and <2 x i16> %B, <i16 42, i16 42>
+  %D = icmp eq <2 x i16> %C, <i16 10, i16 10>
+  ret <2 x i1> %D
+}
+
+define i32 @test33(i32 %c1) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    ret i32 [[C1:%.*]]
+;
+  %x = bitcast i32 %c1 to float
+  %y = bitcast float %x to i32
+  ret i32 %y
+}
+
+define i16 @test34(i16 %a) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 [[A:%.*]], 8
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %c1 = zext i16 %a to i32
+  %t21 = lshr i32 %c1, 8
+  %c2 = trunc i32 %t21 to i16
+  ret i16 %c2
+}
+
+define i16 @test35(i16 %a) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    [[T2:%.*]] = lshr i16 [[A:%.*]], 8
+; CHECK-NEXT:    ret i16 [[T2]]
+;
+  %c1 = bitcast i16 %a to i16
+  %t2 = lshr i16 %c1, 8
+  %c2 = bitcast i16 %t2 to i16
+  ret i16 %c2
+}
+
+; rdar://6480391
+define i1 @test36(i32 %a) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    [[D:%.*]] = icmp sgt i32 [[A:%.*]], -1
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %b = lshr i32 %a, 31
+  %c = trunc i32 %b to i8
+  %d = icmp eq i8 %c, 0
+  ret i1 %d
+}
+
+define <2 x i1> @test36vec(<2 x i32> %a) {
+; CHECK-LABEL: @test36vec(
+; CHECK-NEXT:    [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %b = lshr <2 x i32> %a, <i32 31, i32 31>
+  %c = trunc <2 x i32> %b to <2 x i8>
+  %d = icmp eq <2 x i8> %c, zeroinitializer
+  ret <2 x i1> %d
+}
+
+define i1 @test37(i32 %a) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    ret i1 false
+;
+  %b = lshr i32 %a, 31
+  %c = or i32 %b, 512
+  %d = trunc i32 %c to i8
+  %e = icmp eq i8 %d, 11
+  ret i1 %e
+}
+
+define i64 @test38(i32 %a) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[A:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %1 = icmp eq i32 %a, -2
+  %2 = zext i1 %1 to i8
+  %3 = xor i8 %2, 1
+  %4 = zext i8 %3 to i64
+  ret i64 %4
+}
+
+define i16 @test39(i16 %a) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:    [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]])
+; CHECK-NEXT:    ret i16 [[REV]]
+;
+  %t = zext i16 %a to i32
+  %t21 = lshr i32 %t, 8
+  %t5 = shl i32 %t, 8
+  %t32 = or i32 %t21, %t5
+  %r = trunc i32 %t32 to i16
+  ret i16 %r
+}
+
+define i16 @test40(i16 %a) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[T21:%.*]] = lshr i16 [[A:%.*]], 9
+; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[A]], 8
+; CHECK-NEXT:    [[T32:%.*]] = or i16 [[T21]], [[T5]]
+; CHECK-NEXT:    ret i16 [[T32]]
+;
+  %t = zext i16 %a to i32
+  %t21 = lshr i32 %t, 9
+  %t5 = shl i32 %t, 8
+  %t32 = or i32 %t21, %t5
+  %r = trunc i32 %t32 to i16
+  ret i16 %r
+}
+
+define <2 x i16> @test40vec(<2 x i16> %a) {
+; CHECK-LABEL: @test40vec(
+; CHECK-NEXT:    [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 9, i16 9>
+; CHECK-NEXT:    [[T5:%.*]] = shl <2 x i16> [[A]], <i16 8, i16 8>
+; CHECK-NEXT:    [[T32:%.*]] = or <2 x i16> [[T21]], [[T5]]
+; CHECK-NEXT:    ret <2 x i16> [[T32]]
+;
+  %t = zext <2 x i16> %a to <2 x i32>
+  %t21 = lshr <2 x i32> %t, <i32 9, i32 9>
+  %t5 = shl <2 x i32> %t, <i32 8, i32 8>
+  %t32 = or <2 x i32> %t21, %t5
+  %r = trunc <2 x i32> %t32 to <2 x i16>
+  ret <2 x i16> %r
+}
+
+; PR1263
+define i32* @test41(i32* %t1) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:    ret i32* [[T1:%.*]]
+;
+  %t64 = bitcast i32* %t1 to { i32 }*
+  %t65 = getelementptr { i32 }, { i32 }* %t64, i32 0, i32 0
+  ret i32* %t65
+}
+
+define i32 addrspace(1)* @test41_addrspacecast_smaller(i32* %t1) {
+; CHECK-LABEL: @test41_addrspacecast_smaller(
+; CHECK-NEXT:    [[T65:%.*]] = addrspacecast i32* [[T1:%.*]] to i32 addrspace(1)*
+; CHECK-NEXT:    ret i32 addrspace(1)* [[T65]]
+;
+  %t64 = addrspacecast i32* %t1 to { i32 } addrspace(1)*
+  %t65 = getelementptr { i32 }, { i32 } addrspace(1)* %t64, i32 0, i32 0
+  ret i32 addrspace(1)* %t65
+}
+
+define i32* @test41_addrspacecast_larger(i32 addrspace(1)* %t1) {
+; CHECK-LABEL: @test41_addrspacecast_larger(
+; CHECK-NEXT:    [[T65:%.*]] = addrspacecast i32 addrspace(1)* [[T1:%.*]] to i32*
+; CHECK-NEXT:    ret i32* [[T65]]
+;
+  %t64 = addrspacecast i32 addrspace(1)* %t1 to { i32 }*
+  %t65 = getelementptr { i32 }, { i32 }* %t64, i32 0, i32 0
+  ret i32* %t65
+}
+
+define i32 @test42(i32 %X) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %Y = trunc i32 %X to i8
+  %Z = zext i8 %Y to i32
+  ret i32 %Z
+}
+
+; rdar://6598839
+define zeroext i64 @test43(i8 zeroext %on_off) {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:    [[A:%.*]] = zext i8 [[ON_OFF:%.*]] to i64
+; CHECK-NEXT:    [[B:%.*]] = add nsw i64 [[A]], -1
+; CHECK-NEXT:    ret i64 [[B]]
+;
+  %A = zext i8 %on_off to i32
+  %B = add i32 %A, -1
+  %C = sext i32 %B to i64
+  ret i64 %C  ;; Should be (add (zext i8 -> i64), -1)
+}
+
+define i64 @test44(i8 %T) {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:    [[A:%.*]] = zext i8 [[T:%.*]] to i64
+; CHECK-NEXT:    [[B:%.*]] = or i64 [[A]], 1234
+; CHECK-NEXT:    ret i64 [[B]]
+;
+  %A = zext i8 %T to i16
+  %B = or i16 %A, 1234
+  %C = zext i16 %B to i64
+  ret i64 %C
+}
+
+define i64 @test45(i8 %A, i64 %Q) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:    [[B:%.*]] = sext i8 [[A:%.*]] to i64
+; CHECK-NEXT:    [[C:%.*]] = or i64 [[B]], [[Q:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = and i64 [[C]], 4294967295
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %D = trunc i64 %Q to i32  ;; should be removed
+  %B = sext i8 %A to i32
+  %C = or i32 %B, %D
+  %E = zext i32 %C to i64
+  ret i64 %E
+}
+
+
+define i64 @test46(i64 %A) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:    [[C:%.*]] = shl i64 [[A:%.*]], 8
+; CHECK-NEXT:    [[D:%.*]] = and i64 [[C]], 10752
+; CHECK-NEXT:    ret i64 [[D]]
+;
+  %B = trunc i64 %A to i32
+  %C = and i32 %B, 42
+  %D = shl i32 %C, 8
+  %E = zext i32 %D to i64
+  ret i64 %E
+}
+
+define <2 x i64> @test46vec(<2 x i64> %A) {
+; CHECK-LABEL: @test46vec(
+; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[C:%.*]] = shl <2 x i32> [[B]], <i32 8, i32 8>
+; CHECK-NEXT:    [[D:%.*]] = and <2 x i32> [[C]], <i32 10752, i32 10752>
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[D]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[E]]
+;
+  %B = trunc <2 x i64> %A to <2 x i32>
+  %C = and <2 x i32> %B, <i32 42, i32 42>
+  %D = shl <2 x i32> %C, <i32 8, i32 8>
+  %E = zext <2 x i32> %D to <2 x i64>
+  ret <2 x i64> %E
+}
+
+define i64 @test47(i8 %A) {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[A:%.*]], 42
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[E:%.*]] = and i64 [[C]], 4294967295
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %B = sext i8 %A to i32
+  %C = or i32 %B, 42
+  %E = zext i32 %C to i64
+  ret i64 %E
+}
+
+define i64 @test48(i8 %A1, i8 %a2) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT:    [[Z2:%.*]] = zext i8 [[A1:%.*]] to i32
+; CHECK-NEXT:    [[C:%.*]] = shl nuw nsw i32 [[Z2]], 8
+; CHECK-NEXT:    [[D:%.*]] = or i32 [[C]], [[Z2]]
+; CHECK-NEXT:    [[E:%.*]] = zext i32 [[D]] to i64
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %Z1 = zext i8 %a2 to i32
+  %Z2 = zext i8 %A1 to i32
+  %C = shl i32 %Z2, 8
+  %D = or i32 %C, %Z2
+  %E = zext i32 %D to i64
+  ret i64 %E
+}
+
+define i64 @test49(i64 %A) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT:    [[C:%.*]] = shl i64 [[A:%.*]], 32
+; CHECK-NEXT:    [[SEXT:%.*]] = ashr exact i64 [[C]], 32
+; CHECK-NEXT:    [[D:%.*]] = or i64 [[SEXT]], 1
+; CHECK-NEXT:    ret i64 [[D]]
+;
+  %B = trunc i64 %A to i32
+  %C = or i32 %B, 1
+  %D = sext i32 %C to i64
+  ret i64 %D
+}
+
+define i64 @test50(i64 %x) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT:    [[A:%.*]] = lshr i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[D:%.*]] = shl i64 [[A]], 32
+; CHECK-NEXT:    [[SEXT:%.*]] = add i64 [[D]], -4294967296
+; CHECK-NEXT:    [[E:%.*]] = ashr exact i64 [[SEXT]], 32
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %a = lshr i64 %x, 2
+  %B = trunc i64 %a to i32
+  %D = add i32 %B, -1
+  %E = sext i32 %D to i64
+  ret i64 %E
+; lshr+shl will be handled by DAGCombine.
+}
+
+define i64 @test51(i64 %A, i1 %cond) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[C:%.*]] = and i64 [[A:%.*]], 4294967294
+; CHECK-NEXT:    [[D:%.*]] = or i64 [[A]], 1
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[COND:%.*]], i64 [[C]], i64 [[D]]
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[E]], 32
+; CHECK-NEXT:    [[F:%.*]] = ashr exact i64 [[SEXT]], 32
+; CHECK-NEXT:    ret i64 [[F]]
+;
+  %B = trunc i64 %A to i32
+  %C = and i32 %B, -2
+  %D = or i32 %B, 1
+  %E = select i1 %cond, i32 %C, i32 %D
+  %F = sext i32 %E to i64
+  ret i64 %F
+}
+
+define i32 @test52(i64 %A) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT:    [[B:%.*]] = trunc i64 [[A:%.*]] to i32
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[B]], 7224
+; CHECK-NEXT:    [[D:%.*]] = or i32 [[C]], 32962
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %B = trunc i64 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = zext i16 %D to i32
+  ret i32 %E
+}
+
+define i64 @test53(i32 %A) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 7224
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 32962
+; CHECK-NEXT:    [[D:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    ret i64 [[D]]
+;
+  %B = trunc i32 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = zext i16 %D to i64
+  ret i64 %E
+}
+
+define i32 @test54(i64 %A) {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT:    [[B:%.*]] = trunc i64 [[A:%.*]] to i32
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[B]], 7224
+; CHECK-NEXT:    [[D:%.*]] = or i32 [[C]], -32574
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %B = trunc i64 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = sext i16 %D to i32
+  ret i32 %E
+}
+
+define i64 @test55(i32 %A) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 7224
+; CHECK-NEXT:    [[C:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[D:%.*]] = or i64 [[C]], -32574
+; CHECK-NEXT:    ret i64 [[D]]
+;
+  %B = trunc i32 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = sext i16 %D to i64
+  ret i64 %E
+}
+
+define i64 @test56(i16 %A) {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT:    [[P353:%.*]] = sext i16 [[A:%.*]] to i64
+; CHECK-NEXT:    [[P354:%.*]] = lshr i64 [[P353]], 5
+; CHECK-NEXT:    [[P355:%.*]] = and i64 [[P354]], 134217727
+; CHECK-NEXT:    ret i64 [[P355]]
+;
+  %p353 = sext i16 %A to i32
+  %p354 = lshr i32 %p353, 5
+  %p355 = zext i32 %p354 to i64
+  ret i64 %p355
+}
+
+define <2 x i64> @test56vec(<2 x i16> %A) {
+; CHECK-LABEL: @test56vec(
+; CHECK-NEXT:    [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[P354:%.*]] = lshr <2 x i32> [[P353]], <i32 5, i32 5>
+; CHECK-NEXT:    [[P355:%.*]] = zext <2 x i32> [[P354]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[P355]]
+;
+  %p353 = sext <2 x i16> %A to <2 x i32>
+  %p354 = lshr <2 x i32> %p353, <i32 5, i32 5>
+  %p355 = zext <2 x i32> %p354 to <2 x i64>
+  ret <2 x i64> %p355
+}
+
+define i64 @test57(i64 %A) {
+; CHECK-LABEL: @test57(
+; CHECK-NEXT:    [[C:%.*]] = lshr i64 [[A:%.*]], 8
+; CHECK-NEXT:    [[E:%.*]] = and i64 [[C]], 16777215
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %B = trunc i64 %A to i32
+  %C = lshr i32 %B, 8
+  %E = zext i32 %C to i64
+  ret i64 %E
+}
+
+define <2 x i64> @test57vec(<2 x i64> %A) {
+; CHECK-LABEL: @test57vec(
+; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 8, i32 8>
+; CHECK-NEXT:    [[E:%.*]] = zext <2 x i32> [[C]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[E]]
+;
+  %B = trunc <2 x i64> %A to <2 x i32>
+  %C = lshr <2 x i32> %B, <i32 8, i32 8>
+  %E = zext <2 x i32> %C to <2 x i64>
+  ret <2 x i64> %E
+}
+
+define i64 @test58(i64 %A) {
+; CHECK-LABEL: @test58(
+; CHECK-NEXT:    [[C:%.*]] = lshr i64 [[A:%.*]], 8
+; CHECK-NEXT:    [[D:%.*]] = and i64 [[C]], 16777087
+; CHECK-NEXT:    [[E:%.*]] = or i64 [[D]], 128
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %B = trunc i64 %A to i32
+  %C = lshr i32 %B, 8
+  %D = or i32 %C, 128
+  %E = zext i32 %D to i64
+  ret i64 %E
+
+}
+
+define i64 @test59(i8 %A, i8 %B) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT:    [[C:%.*]] = zext i8 [[A:%.*]] to i64
+; CHECK-NEXT:    [[D:%.*]] = shl nuw nsw i64 [[C]], 4
+; CHECK-NEXT:    [[E:%.*]] = and i64 [[D]], 48
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[B:%.*]], 4
+; CHECK-NEXT:    [[G:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT:    [[H:%.*]] = or i64 [[E]], [[G]]
+; CHECK-NEXT:    ret i64 [[H]]
+;
+  %C = zext i8 %A to i32
+  %D = shl i32 %C, 4
+  %E = and i32 %D, 48
+  %F = zext i8 %B to i32
+  %G = lshr i32 %F, 4
+  %H = or i32 %G, %E
+  %I = zext i32 %H to i64
+  ret i64 %I
+}
+
+define <3 x i32> @test60(<4 x i32> %call4) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i32> [[P10]]
+;
+  %p11 = bitcast <4 x i32> %call4 to i128
+  %p9 = trunc i128 %p11 to i96
+  %p10 = bitcast i96 %p9 to <3 x i32>
+  ret <3 x i32> %p10
+
+}
+
+define <4 x i32> @test61(<3 x i32> %call4) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[P10]]
+;
+  %p11 = bitcast <3 x i32> %call4 to i96
+  %p9 = zext i96 %p11 to i128
+  %p10 = bitcast i128 %p9 to <4 x i32>
+  ret <4 x i32> %p10
+}
+
+define <4 x i32> @test62(<3 x float> %call4) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32>
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[P10]]
+;
+  %p11 = bitcast <3 x float> %call4 to i96
+  %p9 = zext i96 %p11 to i128
+  %p10 = bitcast i128 %p9 to <4 x i32>
+  ret <4 x i32> %p10
+}
+
+; PR7311 - Don't create invalid IR on scalar->vector cast.
+define <2 x float> @test63(i64 %t8) {
+; CHECK-LABEL: @test63(
+; CHECK-NEXT:    [[A:%.*]] = bitcast i64 [[T8:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[VCVT_I:%.*]] = uitofp <2 x i32> [[A]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[VCVT_I]]
+;
+  %a = bitcast i64 %t8 to <2 x i32>
+  %vcvt.i = uitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <4 x float> @test64(<4 x float> %c) {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT:    ret <4 x float> [[C:%.*]]
+;
+  %t0 = bitcast <4 x float> %c to <4 x i32>
+  %t1 = bitcast <4 x i32> %t0 to <4 x float>
+  ret <4 x float> %t1
+}
+
+define <4 x float> @test65(<4 x float> %c) {
+; CHECK-LABEL: @test65(
+; CHECK-NEXT:    ret <4 x float> [[C:%.*]]
+;
+  %t0 = bitcast <4 x float> %c to <2 x double>
+  %t1 = bitcast <2 x double> %t0 to <4 x float>
+  ret <4 x float> %t1
+}
+
+define <2 x float> @test66(<2 x float> %c) {
+; CHECK-LABEL: @test66(
+; CHECK-NEXT:    ret <2 x float> [[C:%.*]]
+;
+  %t0 = bitcast <2 x float> %c to double
+  %t1 = bitcast double %t0 to <2 x float>
+  ret <2 x float> %t1
+}
+
+define float @test2c() {
+; CHECK-LABEL: @test2c(
+; CHECK-NEXT:    ret float -1.000000e+00
+;
+  ret float extractelement (<2 x float> bitcast (double bitcast (<2 x float> <float -1.000000e+00, float -1.000000e+00> to double) to <2 x float>), i32 0)
+}
+
+define i64 @test_mmx(<2 x i32> %x) {
+; CHECK-LABEL: @test_mmx(
+; CHECK-NEXT:    [[C:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[C]]
+;
+  %A = bitcast <2 x i32> %x to x86_mmx
+  %B = bitcast x86_mmx %A to <2 x i32>
+  %C = bitcast <2 x i32> %B to i64
+  ret i64 %C
+}
+
+define i64 @test_mmx_const(<2 x i32> %c) {
+; CHECK-LABEL: @test_mmx_const(
+; CHECK-NEXT:    ret i64 0
+;
+  %A = bitcast <2 x i32> zeroinitializer to x86_mmx
+  %B = bitcast x86_mmx %A to <2 x i32>
+  %C = bitcast <2 x i32> %B to i64
+  ret i64 %C
+}
+
+; PR12514
+define i1 @test67(i1 %a, i32 %b) {
+; CHECK-LABEL: @test67(
+; CHECK-NEXT:    ret i1 false
+;
+  %t2 = zext i1 %a to i32
+  %conv6 = xor i32 %t2, 1
+  %and = and i32 %b, %conv6
+  %sext = shl nuw nsw i32 %and, 24
+  %neg.i = xor i32 %sext, -16777216
+  %conv.i.i = ashr exact i32 %neg.i, 24
+  %trunc = trunc i32 %conv.i.i to i8
+  %tobool.i = icmp eq i8 %trunc, 0
+  ret i1 %tobool.i
+}
+
+%s = type { i32, i32, i16 }
+
+define %s @test68(%s *%p, i64 %i) {
+; CHECK-LABEL: @test68(
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[L:%.*]] = load [[S]], %s* [[PP1]], align 4
+; CHECK-NEXT:    ret [[S]] %l
+;
+  %o = mul i64 %i, 12
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i64 %o
+  %r = bitcast i8* %pp to %s*
+  %l = load %s, %s* %r
+  ret %s %l
+}
+
+; addrspacecasts should be eliminated.
+define %s @test68_addrspacecast(%s* %p, i64 %i) {
+; CHECK-LABEL: @test68_addrspacecast(
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[L:%.*]] = load [[S]], %s* [[PP1]], align 4
+; CHECK-NEXT:    ret [[S]] %l
+;
+  %o = mul i64 %i, 12
+  %q = addrspacecast %s* %p to i8 addrspace(2)*
+  %pp = getelementptr inbounds i8, i8 addrspace(2)* %q, i64 %o
+  %r = addrspacecast i8 addrspace(2)* %pp to %s*
+  %l = load %s, %s* %r
+  ret %s %l
+}
+
+define %s @test68_addrspacecast_2(%s* %p, i64 %i) {
+; CHECK-LABEL: @test68_addrspacecast_2(
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = addrspacecast %s* [[PP1]] to [[S]] addrspace(1)*
+; CHECK-NEXT:    [[L:%.*]] = load [[S]], [[S]] addrspace(1)* [[R]], align 4
+; CHECK-NEXT:    ret [[S]] %l
+;
+  %o = mul i64 %i, 12
+  %q = addrspacecast %s* %p to i8 addrspace(2)*
+  %pp = getelementptr inbounds i8, i8 addrspace(2)* %q, i64 %o
+  %r = addrspacecast i8 addrspace(2)* %pp to %s addrspace(1)*
+  %l = load %s, %s addrspace(1)* %r
+  ret %s %l
+}
+
+define %s @test68_as1(%s addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test68_as1(
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [[S:%.*]], [[S]] addrspace(1)* [[P:%.*]], i32 [[I:%.*]]
+; CHECK-NEXT:    [[L:%.*]] = load [[S]], [[S]] addrspace(1)* [[PP1]], align 4
+; CHECK-NEXT:    ret [[S]] %l
+;
+  %o = mul i32 %i, 12
+  %q = bitcast %s addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr inbounds i8, i8 addrspace(1)* %q, i32 %o
+  %r = bitcast i8 addrspace(1)* %pp to %s addrspace(1)*
+  %l = load %s, %s addrspace(1)* %r
+  ret %s %l
+}
+
+define double @test69(double *%p, i64 %i) {
+; CHECK-LABEL: @test69(
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %o = shl nsw i64 %i, 3
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i64 %o
+  %r = bitcast i8* %pp to double*
+  %l = load double, double* %r
+  ret double %l
+}
+
+define %s @test70(%s *%p, i64 %i) {
+; CHECK-LABEL: @test70(
+; CHECK-NEXT:    [[O:%.*]] = mul nsw i64 [[I:%.*]], 3
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr inbounds [[S:%.*]], %s* [[P:%.*]], i64 [[O]]
+; CHECK-NEXT:    [[L:%.*]] = load [[S]], %s* [[PP1]], align 4
+; CHECK-NEXT:    ret [[S]] %l
+;
+  %o = mul nsw i64 %i, 36
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i64 %o
+  %r = bitcast i8* %pp to %s*
+  %l = load %s, %s* %r
+  ret %s %l
+}
+
+define double @test71(double *%p, i64 %i) {
+; CHECK-LABEL: @test71(
+; CHECK-NEXT:    [[O:%.*]] = shl i64 [[I:%.*]], 2
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr double, double* [[P:%.*]], i64 [[O]]
+; CHECK-NEXT:    [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %o = shl i64 %i, 5
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr i8, i8* %q, i64 %o
+  %r = bitcast i8* %pp to double*
+  %l = load double, double* %r
+  ret double %l
+}
+
+define double @test72(double *%p, i32 %i) {
+; CHECK-LABEL: @test72(
+; CHECK-NEXT:    [[O:%.*]] = sext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[O]]
+; CHECK-NEXT:    [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %so = shl nsw i32 %i, 3
+  %o = sext i32 %so to i64
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i64 %o
+  %r = bitcast i8* %pp to double*
+  %l = load double, double* %r
+  ret double %l
+}
+
+define double @test73(double *%p, i128 %i) {
+; CHECK-LABEL: @test73(
+; CHECK-NEXT:    [[I_TR:%.*]] = trunc i128 [[I:%.*]] to i64
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr double, double* [[P:%.*]], i64 [[I_TR]]
+; CHECK-NEXT:    [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %lo = shl nsw i128 %i, 3
+  %o = trunc i128 %lo to i64
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i64 %o
+  %r = bitcast i8* %pp to double*
+  %l = load double, double* %r
+  ret double %l
+}
+
+define double @test74(double *%p, i64 %i) {
+; CHECK-LABEL: @test74(
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %q = bitcast double* %p to i64*
+  %pp = getelementptr inbounds i64, i64* %q, i64 %i
+  %r = bitcast i64* %pp to double*
+  %l = load double, double* %r
+  ret double %l
+}
+
+define i32* @test75(i32* %p, i32 %x) {
+; CHECK-LABEL: @test75(
+; CHECK-NEXT:    [[Y:%.*]] = shl i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[Z:%.*]] = sext i32 [[Y]] to i64
+; CHECK-NEXT:    [[Q:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[R:%.*]] = getelementptr i8, i8* [[Q]], i64 [[Z]]
+; CHECK-NEXT:    [[S:%.*]] = bitcast i8* [[R]] to i32*
+; CHECK-NEXT:    ret i32* [[S]]
+;
+  %y = shl i32 %x, 3
+  %z = sext i32 %y to i64
+  %q = bitcast i32* %p to i8*
+  %r = getelementptr i8, i8* %q, i64 %z
+  %s = bitcast i8* %r to i32*
+  ret i32* %s
+}
+
+define %s @test76(%s *%p, i64 %i, i64 %j) {
+; CHECK-LABEL: @test76(
+; CHECK-NEXT:    [[O2:%.*]] = mul i64 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[O2]]
+; CHECK-NEXT:    [[L:%.*]] = load [[S]], %s* [[PP1]], align 4
+; CHECK-NEXT:    ret [[S]] %l
+;
+  %o = mul i64 %i, 12
+  %o2 = mul nsw i64 %o, %j
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i64 %o2
+  %r = bitcast i8* %pp to %s*
+  %l = load %s, %s* %r
+  ret %s %l
+}
+
+define %s @test77(%s *%p, i64 %i, i64 %j) {
+; CHECK-LABEL: @test77(
+; CHECK-NEXT:    [[O:%.*]] = mul nsw i64 [[I:%.*]], 3
+; CHECK-NEXT:    [[O2:%.*]] = mul nsw i64 [[O]], [[J:%.*]]
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr inbounds [[S:%.*]], %s* [[P:%.*]], i64 [[O2]]
+; CHECK-NEXT:    [[L:%.*]] = load [[S]], %s* [[PP1]], align 4
+; CHECK-NEXT:    ret [[S]] %l
+;
+  %o = mul nsw i64 %i, 36
+  %o2 = mul nsw i64 %o, %j
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i64 %o2
+  %r = bitcast i8* %pp to %s*
+  %l = load %s, %s* %r
+  ret %s %l
+}
+
+define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) {
+; CHECK-LABEL: @test78(
+; CHECK-NEXT:    [[A:%.*]] = mul nsw i32 [[K:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = mul nsw i32 [[A]], [[L:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = sext i32 [[B]] to i128
+; CHECK-NEXT:    [[D:%.*]] = mul nsw i128 [[C]], [[M:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = mul i128 [[D]], [[N:%.*]]
+; CHECK-NEXT:    [[F:%.*]] = trunc i128 [[E]] to i64
+; CHECK-NEXT:    [[G:%.*]] = mul i64 [[F]], [[I:%.*]]
+; CHECK-NEXT:    [[H:%.*]] = mul i64 [[G]], [[J:%.*]]
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[H]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load [[S]], %s* [[PP1]], align 4
+; CHECK-NEXT:    ret [[S]] %load
+;
+  %a = mul nsw i32 %k, 36
+  %b = mul nsw i32 %a, %l
+  %c = sext i32 %b to i128
+  %d = mul nsw i128 %c, %m
+  %e = mul i128 %d, %n
+  %f = trunc i128 %e to i64
+  %g = mul nsw i64 %f, %i
+  %h = mul nsw i64 %g, %j
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i64 %h
+  %r = bitcast i8* %pp to %s*
+  %load = load %s, %s* %r
+  ret %s %load
+}
+
+define %s @test79(%s *%p, i64 %i, i32 %j) {
+; CHECK-LABEL: @test79(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[TMP1]], 36
+; CHECK-NEXT:    [[C:%.*]] = mul i32 [[B]], [[J:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = bitcast %s* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[C]] to i64
+; CHECK-NEXT:    [[PP:%.*]] = getelementptr inbounds i8, i8* [[Q]], i64 [[TMP2]]
+; CHECK-NEXT:    [[R:%.*]] = bitcast i8* [[PP]] to %s*
+; CHECK-NEXT:    [[L:%.*]] = load [[S:%.*]], %s* [[R]], align 4
+; CHECK-NEXT:    ret [[S]] %l
+;
+  %a = mul nsw i64 %i, 36
+  %b = trunc i64 %a to i32
+  %c = mul i32 %b, %j
+  %q = bitcast %s* %p to i8*
+  %pp = getelementptr inbounds i8, i8* %q, i32 %c
+  %r = bitcast i8* %pp to %s*
+  %l = load %s, %s* %r
+  ret %s %l
+}
+
+define double @test80([100 x double]* %p, i32 %i) {
+; CHECK-LABEL: @test80(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [100 x double], [100 x double]* [[P:%.*]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %t = shl nsw i32 %i, 3
+  %q = bitcast [100 x double]* %p to i8*
+  %pp = getelementptr i8, i8* %q, i32 %t
+  %r = bitcast i8* %pp to double*
+  %l = load double, double* %r
+  ret double %l
+}
+
+define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test80_addrspacecast(
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[I:%.*]]
+; CHECK-NEXT:    [[L:%.*]] = load double, double addrspace(1)* [[PP1]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %t = shl nsw i32 %i, 3
+  %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
+  %pp = getelementptr i8, i8 addrspace(2)* %q, i32 %t
+  %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(1)*
+  %l = load double, double addrspace(1)* %r
+  ret double %l
+}
+
+define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
+; CHECK-LABEL: @test80_addrspacecast_2(
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[I:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = addrspacecast double addrspace(1)* [[PP1]] to double addrspace(3)*
+; CHECK-NEXT:    [[L:%.*]] = load double, double addrspace(3)* [[R]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %t = shl nsw i32 %i, 3
+  %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
+  %pp = getelementptr i8, i8 addrspace(2)* %q, i32 %t
+  %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(3)*
+  %l = load double, double addrspace(3)* %r
+  ret double %l
+}
+
+define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
+; CHECK-LABEL: @test80_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[I:%.*]] to i32
+; CHECK-NEXT:    [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[TMP1]]
+; CHECK-NEXT:    [[L:%.*]] = load double, double addrspace(1)* [[PP1]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %t = shl nsw i16 %i, 3
+  %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
+  %pp = getelementptr i8, i8 addrspace(1)* %q, i16 %t
+  %r = bitcast i8 addrspace(1)* %pp to double addrspace(1)*
+  %l = load double, double addrspace(1)* %r
+  ret double %l
+}
+
+define double @test81(double *%p, float %f) {
+; CHECK-LABEL: @test81(
+; CHECK-NEXT:    [[I:%.*]] = fptosi float [[F:%.*]] to i64
+; CHECK-NEXT:    [[Q:%.*]] = bitcast double* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[PP:%.*]] = getelementptr i8, i8* [[Q]], i64 [[I]]
+; CHECK-NEXT:    [[R:%.*]] = bitcast i8* [[PP]] to double*
+; CHECK-NEXT:    [[L:%.*]] = load double, double* [[R]], align 8
+; CHECK-NEXT:    ret double [[L]]
+;
+  %i = fptosi float %f to i64
+  %q = bitcast double* %p to i8*
+  %pp = getelementptr i8, i8* %q, i64 %i
+  %r = bitcast i8* %pp to double*
+  %l = load double, double* %r
+  ret double %l
+}
+
+define i64 @test82(i64 %A) {
+; CHECK-LABEL: @test82(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[A:%.*]], 1
+; CHECK-NEXT:    [[E:%.*]] = and i64 [[TMP1]], 4294966784
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %B = trunc i64 %A to i32
+  %C = lshr i32 %B, 8
+  %D = shl i32 %C, 9
+  %E = zext i32 %D to i64
+  ret i64 %E
+}
+
+; PR15959
+define i64 @test83(i16 %a, i64 %k) {
+; CHECK-LABEL: @test83(
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[K:%.*]] to i32
+; CHECK-NEXT:    [[SH_PROM:%.*]] = add i32 [[TMP1]], -1
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[SH_PROM]]
+; CHECK-NEXT:    [[SH_PROM1:%.*]] = zext i32 [[SHL]] to i64
+; CHECK-NEXT:    ret i64 [[SH_PROM1]]
+;
+  %conv = sext i16 %a to i32
+  %sub = add nsw i64 %k, -1
+  %sh_prom = trunc i64 %sub to i32
+  %shl = shl i32 %conv, %sh_prom
+  %sh_prom1 = zext i32 %shl to i64
+  ret i64 %sh_prom1
+}
+
+define i8 @test84(i32 %a) {
+; CHECK-LABEL: @test84(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432
+; CHECK-NEXT:    [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
+; CHECK-NEXT:    ret i8 [[TRUNC]]
+;
+  %add = add nsw i32 %a, -16777216
+  %shr = lshr exact i32 %add, 23
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+}
+
+define i8 @test85(i32 %a) {
+; CHECK-LABEL: @test85(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432
+; CHECK-NEXT:    [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
+; CHECK-NEXT:    ret i8 [[TRUNC]]
+;
+  %add = add nuw i32 %a, -16777216
+  %shr = lshr exact i32 %add, 23
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+}
+
+define i16 @test86(i16 %v) {
+; CHECK-LABEL: @test86(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i16 [[V:%.*]], 4
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %a = sext i16 %v to i32
+  %s = ashr i32 %a, 4
+  %t = trunc i32 %s to i16
+  ret i16 %t
+}
+
+define i16 @test87(i16 %v) {
+; CHECK-LABEL: @test87(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i16 [[V:%.*]], 12
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %c = sext i16 %v to i32
+  %m = mul nsw i32 %c, 16
+  %a = ashr i32 %m, 16
+  %t = trunc i32 %a to i16
+  ret i16 %t
+}
+
+define i16 @test88(i16 %v) {
+; CHECK-LABEL: @test88(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i16 [[V:%.*]], 15
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %a = sext i16 %v to i32
+  %s = ashr i32 %a, 18
+  %t = trunc i32 %s to i16
+  ret i16 %t
+}
+
+define i32 @PR21388(i32* %v) {
+; CHECK-LABEL: @PR21388(
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp slt i32* [[V:%.*]], null
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i1 [[ICMP]] to i32
+; CHECK-NEXT:    ret i32 [[SEXT]]
+;
+  %icmp = icmp slt i32* %v, null
+  %sext = sext i1 %icmp to i32
+  ret i32 %sext
+}
+
+define float @sitofp_zext(i16 %a) {
+; CHECK-LABEL: @sitofp_zext(
+; CHECK-NEXT:    [[SITOFP:%.*]] = uitofp i16 [[A:%.*]] to float
+; CHECK-NEXT:    ret float [[SITOFP]]
+;
+  %zext = zext i16 %a to i32
+  %sitofp = sitofp i32 %zext to float
+  ret float %sitofp
+}
+
+define i1 @PR23309(i32 %A, i32 %B) {
+; CHECK-LABEL: @PR23309(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SUB]], 1
+; CHECK-NEXT:    [[TRUNC:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TRUNC]]
+;
+  %add = add i32 %A, -4
+  %sub = sub nsw i32 %add, %B
+  %trunc = trunc i32 %sub to i1
+  ret i1 %trunc
+}
+
+define i1 @PR23309v2(i32 %A, i32 %B) {
+; CHECK-LABEL: @PR23309v2(
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SUB]], 1
+; CHECK-NEXT:    [[TRUNC:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TRUNC]]
+;
+  %add = add i32 %A, -4
+  %sub = add nuw i32 %add, %B
+  %trunc = trunc i32 %sub to i1
+  ret i1 %trunc
+}
+
+define i16 @PR24763(i8 %V) {
+; CHECK-LABEL: @PR24763(
+; CHECK-NEXT:    [[L:%.*]] = ashr i8 [[V:%.*]], 1
+; CHECK-NEXT:    [[T:%.*]] = sext i8 [[L]] to i16
+; CHECK-NEXT:    ret i16 [[T]]
+;
+  %conv = sext i8 %V to i32
+  %l = lshr i32 %conv, 1
+  %t = trunc i32 %l to i16
+  ret i16 %t
+}
+
+define i64 @PR28745() {
+; CHECK-LABEL: @PR28745(
+; CHECK-NEXT:    ret i64 1
+;
+  %b = zext i32 extractvalue ({ i32 } select (i1 icmp eq (i16 extractelement (<2 x i16> bitcast (<1 x i32> <i32 1> to <2 x i16>), i32 0), i16 0), { i32 } { i32 1 }, { i32 } zeroinitializer), 0) to i64
+  ret i64 %b
+}
+
+define i32 @test89() {
+; CHECK-LABEL: @test89(
+; CHECK-NEXT:    ret i32 393216
+;
+  ret i32 bitcast (<2 x i16> <i16 6, i16 undef> to i32)
+}
+
+define <2 x i32> @test90() {
+; CHECK-LABEL: @test90(
+; CHECK-NEXT:    ret <2 x i32> <i32 0, i32 15360>
+;
+  %t6 = bitcast <4 x half> <half undef, half undef, half undef, half 0xH3C00> to <2 x i32>
+  ret <2 x i32> %t6
+}
+
+; Do not optimize to ashr i64 (shift by 48 > 96 - 64)
+define i64 @test91(i64 %A) {
+; CHECK-LABEL: @test91(
+; CHECK-NEXT:    [[B:%.*]] = sext i64 [[A:%.*]] to i96
+; CHECK-NEXT:    [[C:%.*]] = lshr i96 [[B]], 48
+; CHECK-NEXT:    [[D:%.*]] = trunc i96 [[C]] to i64
+; CHECK-NEXT:    ret i64 [[D]]
+;
+  %B = sext i64 %A to i96
+  %C = lshr i96 %B, 48
+  %D = trunc i96 %C to i64
+  ret i64 %D
+}
+
+; Do optimize to ashr i64 (shift by 32 <= 96 - 64)
+define i64 @test92(i64 %A) {
+; CHECK-LABEL: @test92(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i64 [[A:%.*]], 32
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %B = sext i64 %A to i96
+  %C = lshr i96 %B, 32
+  %D = trunc i96 %C to i64
+  ret i64 %D
+}
+
+; When optimizing to ashr i32, don't shift by more than 31.
+define i32 @test93(i32 %A) {
+; CHECK-LABEL: @test93(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %B = sext i32 %A to i96
+  %C = lshr i96 %B, 64
+  %D = trunc i96 %C to i32
+  ret i32 %D
+}
+
+; The following four tests sext + lshr + trunc patterns.
+; PR33078
+
+define i8 @pr33078_1(i8 %A) {
+; CHECK-LABEL: @pr33078_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 [[A:%.*]], 7
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %B = sext i8 %A to i16
+  %C = lshr i16 %B, 8
+  %D = trunc i16 %C to i8
+  ret i8 %D
+}
+
+define i12 @pr33078_2(i8 %A) {
+; CHECK-LABEL: @pr33078_2(
+; CHECK-NEXT:    [[C:%.*]] = ashr i8 [[A:%.*]], 4
+; CHECK-NEXT:    [[D:%.*]] = sext i8 [[C]] to i12
+; CHECK-NEXT:    ret i12 [[D]]
+;
+  %B = sext i8 %A to i16
+  %C = lshr i16 %B, 4
+  %D = trunc i16 %C to i12
+  ret i12 %D
+}
+
+define i4 @pr33078_3(i8 %A) {
+; CHECK-LABEL: @pr33078_3(
+; CHECK-NEXT:    [[B:%.*]] = sext i8 [[A:%.*]] to i16
+; CHECK-NEXT:    [[C:%.*]] = lshr i16 [[B]], 12
+; CHECK-NEXT:    [[D:%.*]] = trunc i16 [[C]] to i4
+; CHECK-NEXT:    ret i4 [[D]]
+;
+  %B = sext i8 %A to i16
+  %C = lshr i16 %B, 12
+  %D = trunc i16 %C to i4
+  ret i4 %D
+}
+
+define i8 @pr33078_4(i3 %x) {
+; Don't turn this in an `ashr`. This was getting miscompiled
+; CHECK-LABEL: @pr33078_4(
+; CHECK-NEXT:    [[B:%.*]] = sext i3 [[X:%.*]] to i16
+; CHECK-NEXT:    [[C:%.*]] = lshr i16 [[B]], 13
+; CHECK-NEXT:    [[D:%.*]] = trunc i16 [[C]] to i8
+; CHECK-NEXT:    ret i8 [[D]]
+;
+  %B = sext i3 %x to i16
+  %C = lshr i16 %B, 13
+  %D = trunc i16 %C to i8
+  ret i8 %D
+}
+
+; (sext (xor (cmp), -1)) -> (sext (!cmp))
+define i64 @test94(i32 %a) {
+; CHECK-LABEL: @test94(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[A:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i1 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %1 = icmp eq i32 %a, -2
+  %2 = sext i1 %1 to i8
+  %3 = xor i8 %2, -1
+  %4 = sext i8 %3 to i64
+  ret i64 %4
+}
+
+; We should be able to remove the zext and trunc here.
+define i32 @test95(i32 %x) {
+; CHECK-LABEL: @test95(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 40
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = trunc i32 %x to i8
+  %2 = lshr i8 %1, 6
+  %3 = and i8 %2, 2
+  %4 = or i8 %3, 40
+  %5 = zext i8 %4 to i32
+  ret i32 %5
+}
diff --git a/llvm/test/Transforms/InstCombine/cast_phi.ll b/llvm/test/Transforms/InstCombine/cast_phi.ll
new file mode 100644
index 00000000000..141ad186002
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast_phi.ll
@@ -0,0 +1,135 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define void @MainKernel(i32 %iNumSteps, i32 %tid, i32 %base) {
+; CHECK-NOT: bitcast
+
+  %callA = alloca [258 x float], align 4
+  %callB = alloca [258 x float], align 4
+  %conv.i = uitofp i32 %iNumSteps to float
+  %1 = bitcast float %conv.i to i32
+  %conv.i12 = zext i32 %tid to i64
+  %arrayidx3 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 %conv.i12
+  %2 = bitcast float* %arrayidx3 to i32*
+  store i32 %1, i32* %2, align 4
+  %arrayidx6 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 %conv.i12
+  %3 = bitcast float* %arrayidx6 to i32*
+  store i32 %1, i32* %3, align 4
+  %cmp7 = icmp eq i32 %tid, 0
+  br i1 %cmp7, label %.bb1, label %.bb2
+
+.bb1:
+  %arrayidx10 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 256
+  store float %conv.i, float* %arrayidx10, align 4
+  %arrayidx11 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 256
+  store float 0.000000e+00, float* %arrayidx11, align 4
+  br label %.bb2
+
+.bb2:
+  %cmp135 = icmp sgt i32 %iNumSteps, 0
+  br i1 %cmp135, label %.bb3, label %.bb8
+
+; CHECK-LABEL: .bb3
+; CHECK: phi float
+; CHECK: phi float
+; CHECK: phi i32 {{.*}} [ %iNumSteps
+; CHECK-NOT: rA.sroa.[0-9].[0-9] = phi i32
+; CHECK-NOT: phi float
+; CHECK-NOT: phi i32
+; CHECK-LABEL: .bb4
+
+.bb3:
+  %rA.sroa.8.0 = phi i32 [ %rA.sroa.8.2, %.bb12 ], [ %1, %.bb2 ]
+  %rA.sroa.0.0 = phi i32 [ %rA.sroa.0.2, %.bb12 ], [ %1, %.bb2 ]
+  %i12.06 = phi i32 [ %sub, %.bb12 ], [ %iNumSteps, %.bb2 ]
+  %4 = icmp ugt i32 %i12.06, %base
+  %add = add i32 %i12.06, 1
+  %conv.i9 = sext i32 %add to i64
+  %arrayidx20 = getelementptr inbounds [258 x float], [258 x float]* %callA, i64 0, i64 %conv.i9
+  %5 = bitcast float* %arrayidx20 to i32*
+  %arrayidx24 = getelementptr inbounds [258 x float], [258 x float]* %callB, i64 0, i64 %conv.i9
+  %6 = bitcast float* %arrayidx24 to i32*
+  %cmp40 = icmp ult i32 %i12.06, %base
+  br i1 %4, label %.bb4, label %.bb5
+
+.bb4:
+  %7 = load i32, i32* %5, align 4
+  %8 = load i32, i32* %6, align 4
+  %9 = bitcast i32 %8 to float
+  %10 = bitcast i32 %7 to float
+  %add33 = fadd float %9, %10
+  %11 = bitcast i32 %rA.sroa.8.0 to float
+  %add33.1 = fadd float %add33, %11
+  %12 = bitcast float %add33.1 to i32
+  %13 = bitcast i32 %rA.sroa.0.0 to float
+  %add33.2 = fadd float %add33.1, %13
+  %14 = bitcast float %add33.2 to i32
+  br label %.bb5
+
+; CHECK-LABEL: .bb5
+; CHECK: phi float
+; CHECK: phi float
+; CHECK-NOT: rA.sroa.[0-9].[0-9] = phi i32
+; CHECK-NOT: phi float
+; CHECK-NOT: phi i32
+; CHECK-LABEL: .bb6
+
+.bb5:
+  %rA.sroa.8.1 = phi i32 [ %12, %.bb4 ], [ %rA.sroa.8.0, %.bb3 ]
+  %rA.sroa.0.1 = phi i32 [ %14, %.bb4 ], [ %rA.sroa.0.0, %.bb3 ]
+  br i1 %cmp40, label %.bb6, label %.bb7
+
+.bb6:
+  store i32 %rA.sroa.0.1, i32* %2, align 4
+  store i32 %rA.sroa.8.1, i32* %3, align 4
+  br label %.bb7
+
+.bb7:
+  br i1 %4, label %.bb9, label %.bb10
+
+.bb8:
+  ret void
+
+.bb9:
+  %15 = load i32, i32* %5, align 4
+  %16 = load i32, i32* %6, align 4
+  %17 = bitcast i32 %16 to float
+  %18 = bitcast i32 %15 to float
+  %add33.112 = fadd float %17, %18
+  %19 = bitcast i32 %rA.sroa.8.1 to float
+  %add33.1.1 = fadd float %add33.112, %19
+  %20 = bitcast float %add33.1.1 to i32
+  %21 = bitcast i32 %rA.sroa.0.1 to float
+  %add33.2.1 = fadd float %add33.1.1, %21
+  %22 = bitcast float %add33.2.1 to i32
+  br label %.bb10
+
+; CHECK-LABEL: .bb10
+; CHECK: phi float
+; CHECK: phi float
+; CHECK-NOT: rA.sroa.[0-9].[0-9] = phi i32
+; CHECK-NOT: phi float
+; CHECK-NOT: phi i32
+; CHECK-LABEL: .bb11
+
+.bb10:
+  %rA.sroa.8.2 = phi i32 [ %20, %.bb9 ], [ %rA.sroa.8.1, %.bb7 ]
+  %rA.sroa.0.2 = phi i32 [ %22, %.bb9 ], [ %rA.sroa.0.1, %.bb7 ]
+  br i1 %cmp40, label %.bb11, label %.bb12
+
+; CHECK-LABEL: .bb11
+; CHECK: store float
+; CHECK: store float
+; CHECK-NOT: store i32 %rA.sroa.[0-9].[0-9]
+; CHECK-LABEL: .bb12
+
+.bb11:
+  store i32 %rA.sroa.0.2, i32* %2, align 4
+  store i32 %rA.sroa.8.2, i32* %3, align 4
+  br label %.bb12
+
+.bb12:
+  %sub = add i32 %i12.06, -4
+  %cmp13 = icmp sgt i32 %sub, 0
+  br i1 %cmp13, label %.bb3, label %.bb8
+}
diff --git a/llvm/test/Transforms/InstCombine/cast_ptr.ll b/llvm/test/Transforms/InstCombine/cast_ptr.ll
new file mode 100644
index 00000000000..eaf946ef925
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cast_ptr.ll
@@ -0,0 +1,129 @@
+; Tests to make sure elimination of casts is working correctly
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "p:32:32-p1:32:32-p2:16:16"
+
+@global = global i8 0
+
+; This shouldn't convert to getelementptr because the relationship
+; between the arithmetic and the layout of allocated memory is
+; entirely unknown.
+; CHECK-LABEL: @test1(
+; CHECK: ptrtoint
+; CHECK: add
+; CHECK: inttoptr
+define i8* @test1(i8* %t) {
+        %tmpc = ptrtoint i8* %t to i32          ; <i32> [#uses=1]
+        %tmpa = add i32 %tmpc, 32               ; <i32> [#uses=1]
+        %tv = inttoptr i32 %tmpa to i8*         ; <i8*> [#uses=1]
+        ret i8* %tv
+}
+
+; These casts should be folded away.
+; CHECK-LABEL: @test2(
+; CHECK: icmp eq i8* %a, %b
+define i1 @test2(i8* %a, i8* %b) {
+        %tmpa = ptrtoint i8* %a to i32          ; <i32> [#uses=1]
+        %tmpb = ptrtoint i8* %b to i32          ; <i32> [#uses=1]
+        %r = icmp eq i32 %tmpa, %tmpb           ; <i1> [#uses=1]
+        ret i1 %r
+}
+
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_same_int(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_same_int(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i16
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i16
+  %r = icmp eq i16 %tmpa, %tmpb
+  ret i1 %r
+}
+
+; These casts should be folded away.
+; CHECK-LABEL: @test2_as2_larger(
+; CHECK: icmp eq i8 addrspace(2)* %a, %b
+define i1 @test2_as2_larger(i8 addrspace(2)* %a, i8 addrspace(2)* %b) {
+  %tmpa = ptrtoint i8 addrspace(2)* %a to i32
+  %tmpb = ptrtoint i8 addrspace(2)* %b to i32
+  %r = icmp eq i32 %tmpa, %tmpb
+  ret i1 %r
+}
+
+; These casts should not be folded away.
+; CHECK-LABEL: @test2_diff_as
+; CHECK: icmp sge i32 %i0, %i1
+define i1 @test2_diff_as(i8* %p, i8 addrspace(1)* %q) {
+  %i0 = ptrtoint i8* %p to i32
+  %i1 = ptrtoint i8 addrspace(1)* %q to i32
+  %r0 = icmp sge i32 %i0, %i1
+  ret i1 %r0
+}
+
+; These casts should not be folded away.
+; CHECK-LABEL: @test2_diff_as_global
+; CHECK: icmp sge i32 %i1
+define i1 @test2_diff_as_global(i8 addrspace(1)* %q) {
+  %i0 = ptrtoint i8* @global to i32
+  %i1 = ptrtoint i8 addrspace(1)* %q to i32
+  %r0 = icmp sge i32 %i1, %i0
+  ret i1 %r0
+}
+
+; These casts should also be folded away.
+; CHECK-LABEL: @test3(
+; CHECK: icmp eq i8* %a, @global
+define i1 @test3(i8* %a) {
+        %tmpa = ptrtoint i8* %a to i32
+        %r = icmp eq i32 %tmpa, ptrtoint (i8* @global to i32)
+        ret i1 %r
+}
+
+define i1 @test4(i32 %A) {
+  %B = inttoptr i32 %A to i8*
+  %C = icmp eq i8* %B, null
+  ret i1 %C
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: %C = icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1 %C
+}
+
+define i1 @test4_as2(i16 %A) {
+; CHECK-LABEL: @test4_as2(
+; CHECK-NEXT: %C = icmp eq i16 %A, 0
+; CHECK-NEXT: ret i1 %C
+  %B = inttoptr i16 %A to i8 addrspace(2)*
+  %C = icmp eq i8 addrspace(2)* %B, null
+  ret i1 %C
+}
+
+
+; Pulling the cast out of the load allows us to eliminate the load, and then
+; the whole array.
+
+        %op = type { float }
+        %unop = type { i32 }
+@Array = internal constant [1 x %op* (%op*)*] [ %op* (%op*)* @foo ]             ; <[1 x %op* (%op*)*]*> [#uses=1]
+
+declare %op* @foo(%op* %X)
+
+define %unop* @test5(%op* %O) {
+        %tmp = load %unop* (%op*)*, %unop* (%op*)** bitcast ([1 x %op* (%op*)*]* @Array to %unop* (%op*)**); <%unop* (%op*)*> [#uses=1]
+        %tmp.2 = call %unop* %tmp( %op* %O )            ; <%unop*> [#uses=1]
+        ret %unop* %tmp.2
+; CHECK-LABEL: @test5(
+; CHECK: call %op* @foo(%op* %O)
+}
+
+
+
+; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
+; the address space.
+
+define i8 @test6(i8 addrspace(1)* %source) {
+entry:
+  %arrayidx223 = addrspacecast i8 addrspace(1)* %source to i8*
+  %tmp4 = load i8, i8* %arrayidx223
+  ret i8 %tmp4
+; CHECK-LABEL: @test6(
+; CHECK: load i8, i8* %arrayidx223
+}
diff --git a/llvm/test/Transforms/InstCombine/ceil.ll b/llvm/test/Transforms/InstCombine/ceil.ll
new file mode 100644
index 00000000000..9f965a3c34b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ceil.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.ceil.f32(float) #0
+declare double @llvm.ceil.f64(double) #0
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #0
+
+; CHECK-LABEL: @constant_fold_ceil_f32_01
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_ceil_f32_01() #0 {
+  %x = call float @llvm.ceil.f32(float 1.00) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f32_02
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_ceil_f32_02() #0 {
+  %x = call float @llvm.ceil.f32(float 1.25) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f32_03
+; CHECK-NEXT: ret float -1.000000e+00
+define float @constant_fold_ceil_f32_03() #0 {
+  %x = call float @llvm.ceil.f32(float -1.25) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_v4f32_01
+; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float -1.000000e+00, float -1.000000e+00>
+define <4 x float> @constant_fold_ceil_v4f32_01() #0 {
+  %x = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float 1.00, float 1.25, float -1.25, float -1.00>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f64_01
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_ceil_f64_01() #0 {
+  %x = call double @llvm.ceil.f64(double 1.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f64_02
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_ceil_f64_02() #0 {
+  %x = call double @llvm.ceil.f64(double 1.3) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_ceil_f64_03
+; CHECK-NEXT: ret double -1.000000e+00
+define double @constant_fold_ceil_f64_03() #0 {
+  %x = call double @llvm.ceil.f64(double -1.75) #0
+  ret double %x
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
new file mode 100644
index 00000000000..49b5160ba24
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
@@ -0,0 +1,607 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (X < C1) ? C1 : MIN(X, C2)
+define float @clamp_float_fast_ordered_strict_maxmin(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_ordered_strict_maxmin(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %cmp2 = fcmp fast olt float %x, 255.0
+  %min = select i1 %cmp2, float %x, float 255.0
+  %cmp1 = fcmp fast olt float %x, 1.0
+  %r = select i1 %cmp1, float 1.0, float %min
+  ret float %r
+}
+
+; (X <= C1) ? C1 : MIN(X, C2)
+define float @clamp_float_fast_ordered_nonstrict_maxmin(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_maxmin(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %cmp2 = fcmp fast olt float %x, 255.0
+  %min = select i1 %cmp2, float %x, float 255.0
+  %cmp1 = fcmp fast ole float %x, 1.0
+  %r = select i1 %cmp1, float 1.0, float %min
+  ret float %r
+}
+
+; (X > C1) ? C1 : MAX(X, C2)
+define float @clamp_float_fast_ordered_strict_minmax(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_ordered_strict_minmax(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %cmp2 = fcmp fast ogt float %x, 1.0
+  %max = select i1 %cmp2, float %x, float 1.0
+  %cmp1 = fcmp fast ogt float %x, 255.0
+  %r = select i1 %cmp1, float 255.0, float %max
+  ret float %r
+}
+
+; (X >= C1) ? C1 : MAX(X, C2)
+define float @clamp_float_fast_ordered_nonstrict_minmax(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_minmax(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %cmp2 = fcmp fast ogt float %x, 1.0
+  %max = select i1 %cmp2, float %x, float 1.0
+  %cmp1 = fcmp fast oge float %x, 255.0
+  %r = select i1 %cmp1, float 255.0, float %max
+  ret float %r
+}
+
+
+; The same for unordered
+
+; (X < C1) ? C1 : MIN(X, C2)
+define float @clamp_float_fast_unordered_strict_maxmin(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_unordered_strict_maxmin(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %cmp2 = fcmp fast ult float %x, 255.0
+  %min = select i1 %cmp2, float %x, float 255.0
+  %cmp1 = fcmp fast ult float %x, 1.0
+  %r = select i1 %cmp1, float 1.0, float %min
+  ret float %r
+}
+
+; (X <= C1) ? C1 : MIN(X, C2)
+define float @clamp_float_fast_unordered_nonstrict_maxmin(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_maxmin(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %cmp2 = fcmp fast ult float %x, 255.0
+  %min = select i1 %cmp2, float %x, float 255.0
+  %cmp1 = fcmp fast ule float %x, 1.0
+  %r = select i1 %cmp1, float 1.0, float %min
+  ret float %r
+}
+
+; (X > C1) ? C1 : MAX(X, C2)
+define float @clamp_float_fast_unordered_strict_minmax(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_unordered_strict_minmax(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %cmp2 = fcmp fast ugt float %x, 1.0
+  %max = select i1 %cmp2, float %x, float 1.0
+  %cmp1 = fcmp fast ugt float %x, 255.0
+  %r = select i1 %cmp1, float 255.0, float %max
+  ret float %r
+}
+
+; (X >= C1) ? C1 : MAX(X, C2)
+define float @clamp_float_fast_unordered_nonstrict_minmax(float %x) {
+;
+; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_minmax(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %cmp2 = fcmp fast ugt float %x, 1.0
+  %max = select i1 %cmp2, float %x, float 1.0
+  %cmp1 = fcmp fast uge float %x, 255.0
+  %r = select i1 %cmp1, float 255.0, float %max
+  ret float %r
+}
+
+; Some more checks with fast
+
+; (X > 1.0) ? min(x, 255.0) : 1.0
+; That did not match because select was in inverse order.
+define float @clamp_test_1(float %x) {
+; CHECK-LABEL: @clamp_test_1(
+; CHECK-NEXT:    [[INNER_CMP_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[INNER_SEL:%.*]] = select i1 [[INNER_CMP_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[INNER_SEL]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[INNER_SEL]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
+;
+  %inner_cmp = fcmp fast ult float %x, 255.0
+  %inner_sel = select i1 %inner_cmp, float %x, float 255.0
+  %outer_cmp = fcmp fast ugt float %x, 1.0
+  %r = select i1 %outer_cmp, float %inner_sel, float 1.0
+  ret float %r
+}
+
+; And something negative
+
+; Like @clamp_test_1 but HighConst < LowConst
+define float @clamp_negative_wrong_const(float %x) {
+; CHECK-LABEL: @clamp_negative_wrong_const(
+; CHECK-NEXT:    [[INNER_CMP_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[INNER_SEL:%.*]] = select i1 [[INNER_CMP_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[OUTER_CMP:%.*]] = fcmp fast ugt float [[X]], 5.120000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 5.120000e+02
+; CHECK-NEXT:    ret float [[R]]
+;
+  %inner_cmp = fcmp fast ult float %x, 255.0
+  %inner_sel = select i1 %inner_cmp, float %x, float 255.0
+  %outer_cmp = fcmp fast ugt float %x, 512.0
+  %r = select i1 %outer_cmp, float %inner_sel, float 512.0
+  ret float %r
+}
+
+; Like @clamp_test_1 but both are min
+define float @clamp_negative_same_op(float %x) {
+; CHECK-LABEL: @clamp_negative_same_op(
+; CHECK-NEXT:    [[INNER_CMP_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[INNER_SEL:%.*]] = select i1 [[INNER_CMP_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[OUTER_CMP:%.*]] = fcmp fast ult float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R]]
+;
+  %inner_cmp = fcmp fast ult float %x, 255.0
+  %inner_sel = select i1 %inner_cmp, float %x, float 255.0
+  %outer_cmp = fcmp fast ult float %x, 1.0
+  %r = select i1 %outer_cmp, float %inner_sel, float 1.0
+  ret float %r
+}
+
+
+; And now without fast.
+
+; First, check that we don't do bad things in the presence of signed zeros
+define float @clamp_float_with_zero1(float %x) {
+; CHECK-LABEL: @clamp_float_with_zero1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ole float [[X]], 0.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 0.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp fast olt float %x, 255.0
+  %min = select i1 %cmp2, float %x, float 255.0
+  %cmp1 = fcmp ole float %x, 0.0
+  %r = select i1 %cmp1, float 0.0, float %min
+  ret float %r
+}
+
+define float @clamp_float_with_zero2(float %x) {
+; CHECK-LABEL: @clamp_float_with_zero2(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[X]], 0.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 0.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp fast olt float %x, 255.0
+  %min = select i1 %cmp2, float %x, float 255.0
+  %cmp1 = fcmp olt float %x, 0.0
+  %r = select i1 %cmp1, float 0.0, float %min
+  ret float %r
+}
+
+; Also, here we care more about the ordering of the inner min/max, so
+; two times more cases.
+; TODO: that is not implemented yet, so these checks are for the
+;       future. This means that checks below can just check that
+;       "fcmp.*%x" happens twice for each label.
+
+; (X < C1) ? C1 : MIN(X, C2)
+define float @clamp_float_ordered_strict_maxmin1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_strict_maxmin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp olt float %x, 255.0                   ; X is NaN => false
+  %min = select i1 %cmp2, float %x, float 255.0      ;             255.0
+  %cmp1 = fcmp olt float %x, 1.0                     ;             false
+  %r = select i1 %cmp1, float 1.0, float %min        ;             min (255.0)
+  ret float %r
+}
+
+define float @clamp_float_ordered_strict_maxmin2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_strict_maxmin2(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ult float %x, 255.0                  ; X is NaN => true
+  %min = select i1 %cmp2, float %x, float 255.0     ;             NaN
+  %cmp1 = fcmp olt float %x, 1.0                    ;             false
+  %r = select i1 %cmp1, float 1.0, float %min       ;             min (NaN)
+  ret float %r
+}
+
+; (X <= C1) ? C1 : MIN(X, C2)
+define float @clamp_float_ordered_nonstrict_maxmin1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_nonstrict_maxmin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ole float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp olt float %x, 255.0                  ; X is NaN => false
+  %min = select i1 %cmp2, float %x, float 255.0     ;             255.0
+  %cmp1 = fcmp ole float %x, 1.0                    ;             false
+  %r = select i1 %cmp1, float 1.0, float %min       ;             min (255.0)
+  ret float %r
+}
+
+define float @clamp_float_ordered_nonstrict_maxmin2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_nonstrict_maxmin2(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ole float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ult float %x, 255.0                  ; x is NaN => true
+  %min = select i1 %cmp2, float %x, float 255.0     ;             NaN
+  %cmp1 = fcmp ole float %x, 1.0                    ;             false
+  %r = select i1 %cmp1, float 1.0, float %min       ;             min (NaN)
+  ret float %r
+}
+
+; (X > C1) ? C1 : MAX(X, C2)
+define float @clamp_float_ordered_strict_minmax1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_strict_minmax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ogt float %x, 1.0                    ; x is NaN => false
+  %max = select i1 %cmp2, float %x, float 1.0       ;             1.0
+  %cmp1 = fcmp ogt float %x, 255.0                  ;             false
+  %r = select i1 %cmp1, float 255.0, float %max     ;             max (1.0)
+  ret float %r
+}
+
+define float @clamp_float_ordered_strict_minmax2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_strict_minmax2(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ugt float %x, 1.0                    ; x is NaN => true
+  %max = select i1 %cmp2, float %x, float 1.0       ;             NaN
+  %cmp1 = fcmp ogt float %x, 255.0                  ;             false
+  %r = select i1 %cmp1, float 255.0, float %max     ;             max (NaN)
+  ret float %r
+}
+
+; (X >= C1) ? C1 : MAX(X, C2)
+define float @clamp_float_ordered_nonstrict_minmax1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_nonstrict_minmax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp oge float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ogt float %x, 1.0                    ; x is NaN => false
+  %max = select i1 %cmp2, float %x, float 1.0       ;             1.0
+  %cmp1 = fcmp oge float %x, 255.0                  ;             false
+  %r = select i1 %cmp1, float 255.0, float %max     ;             max (1.0)
+  ret float %r
+}
+
+define float @clamp_float_ordered_nonstrict_minmax2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_ordered_nonstrict_minmax2(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp oge float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ugt float %x, 1.0                    ; x is NaN => true
+  %max = select i1 %cmp2, float %x, float 1.0       ;             NaN
+  %cmp1 = fcmp oge float %x, 255.0                  ;             false
+  %r = select i1 %cmp1, float 255.0, float %max     ;             max (NaN)
+  ret float %r
+}
+
+
+; The same for unordered
+
+; (X < C1) ? C1 : MIN(X, C2)
+define float @clamp_float_unordered_strict_maxmin1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_strict_maxmin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ult float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp olt float %x, 255.0                  ; x is NaN => false
+  %min = select i1 %cmp2, float %x, float 255.0     ;             255.0
+  %cmp1 = fcmp ult float %x, 1.0                    ;             true
+  %r = select i1 %cmp1, float 1.0, float %min       ;             1.0
+  ret float %r
+}
+
+define float @clamp_float_unordered_strict_maxmin2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_strict_maxmin2(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ult float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ult float %x, 255.0                  ; x is NaN => true
+  %min = select i1 %cmp2, float %x, float 255.0     ;             NaN
+  %cmp1 = fcmp ult float %x, 1.0                    ;             true
+  %r = select i1 %cmp1, float 1.0, float %min       ;             1.0
+  ret float %r
+}
+
+; (X <= C1) ? C1 : MIN(X, C2)
+define float @clamp_float_unordered_nonstrict_maxmin1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_nonstrict_maxmin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp olt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ule float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp olt float %x, 255.0                  ; x is NaN => false
+  %min = select i1 %cmp2, float %x, float 255.0     ;             255.0
+  %cmp1 = fcmp ule float %x, 1.0                    ;             true
+  %r = select i1 %cmp1, float 1.0, float %min       ;             1.0
+  ret float %r
+}
+
+define float @clamp_float_unordered_nonstrict_maxmin2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_nonstrict_maxmin2(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp oge float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ule float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ult float %x, 255.0                  ; x is NaN => true
+  %min = select i1 %cmp2, float %x, float 255.0     ;             NaN
+  %cmp1 = fcmp ule float %x, 1.0                    ;             true
+  %r = select i1 %cmp1, float 1.0, float %min       ;             1.0
+  ret float %r
+}
+
+; (X > C1) ? C1 : MAX(X, C2)
+define float @clamp_float_unordered_strict_minmax1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_strict_minmax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ugt float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ogt float %x, 1.0                    ; x is NaN => false
+  %max = select i1 %cmp2, float %x, float 1.0       ;             1.0
+  %cmp1 = fcmp ugt float %x, 255.0                  ;             true
+  %r = select i1 %cmp1, float 255.0, float %max     ;             255.0
+  ret float %r
+}
+
+define float @clamp_float_unordered_strict_minmax2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_strict_minmax2(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ugt float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ugt float %x, 1.0                    ; x is NaN => true
+  %max = select i1 %cmp2, float %x, float 1.0       ;             NaN
+  %cmp1 = fcmp ugt float %x, 255.0                  ;             true
+  %r = select i1 %cmp1, float 255.0, float %max     ;             255.0
+  ret float %r
+}
+
+; (X >= C1) ? C1 : MAX(X, C2)
+define float @clamp_float_unordered_nonstrict_minmax1(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_nonstrict_minmax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp uge float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ogt float %x, 1.0                    ; x is NaN => false
+  %max = select i1 %cmp2, float %x, float 1.0       ;             1.0
+  %cmp1 = fcmp uge float %x, 255.0                  ;             true
+  %r = select i1 %cmp1, float 255.0, float %max     ;             255.0
+  ret float %r
+}
+
+define float @clamp_float_unordered_nonstrict_minmax2(float %x) {
+;
+; CHECK-LABEL: @clamp_float_unordered_nonstrict_minmax2(
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp ole float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp uge float [[X]], 2.550000e+02
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp2 = fcmp ugt float %x, 1.0                    ; x is NaN => true
+  %max = select i1 %cmp2, float %x, float 1.0       ;             NaN
+  %cmp1 = fcmp uge float %x, 255.0                  ;             true
+  %r = select i1 %cmp1, float 255.0, float %max     ;             255.0
+  ret float %r
+}
+
+;; Check casts behavior
+define float @ui32_clamp_and_cast_to_float(i32 %x) {
+; CHECK-LABEL: @ui32_clamp_and_cast_to_float(
+; CHECK-NEXT:    [[LO_CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X]], 255
+; CHECK-NEXT:    [[MIN1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[TMP2:%.*]] = uitofp i32 [[MIN1]] to float
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[TMP2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %f_x = uitofp i32 %x to float
+  %up_cmp = icmp ugt i32 %x, 255
+  %lo_cmp = icmp ult i32 %x, 1
+  %min = select i1 %up_cmp, float 255.0, float %f_x
+  %r = select i1 %lo_cmp, float 1.0, float %min
+  ret float %r
+}
+
+define float @ui64_clamp_and_cast_to_float(i64 %x) {
+; CHECK-LABEL: @ui64_clamp_and_cast_to_float(
+; CHECK-NEXT:    [[LO_CMP:%.*]] = icmp eq i64 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[X]], 255
+; CHECK-NEXT:    [[MIN1:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 255
+; CHECK-NEXT:    [[TMP2:%.*]] = uitofp i64 [[MIN1]] to float
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[TMP2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %f_x = uitofp i64 %x to float
+  %up_cmp = icmp ugt i64 %x, 255
+  %lo_cmp = icmp ult i64 %x, 1
+  %min = select i1 %up_cmp, float 255.0, float %f_x
+  %r = select i1 %lo_cmp, float 1.0, float %min
+  ret float %r
+}
+
+define float @mixed_clamp_to_float_1(i32 %x) {
+; CHECK-LABEL: @mixed_clamp_to_float_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[SI_MIN:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[SI_MIN]], 1
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[TMP2]], i32 [[SI_MIN]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = sitofp i32 [[R1]] to float
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %si_min_cmp = icmp sgt i32 %x, 255
+  %si_min = select i1 %si_min_cmp, i32 255, i32 %x
+  %f_min = sitofp i32 %si_min to float
+  %f_x = sitofp i32 %x to float
+  %lo_cmp = fcmp ult float %f_x, 1.0
+  %r = select i1 %lo_cmp, float 1.0, float %f_min
+  ret float %r
+}
+
+define i32 @mixed_clamp_to_i32_1(float %x) {
+; CHECK-LABEL: @mixed_clamp_to_i32_1(
+; CHECK-NEXT:    [[FLOAT_MIN_CMP:%.*]] = fcmp ogt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[FLOAT_MIN:%.*]] = select i1 [[FLOAT_MIN_CMP]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[I32_MIN:%.*]] = fptosi float [[FLOAT_MIN]] to i32
+; CHECK-NEXT:    [[I32_X:%.*]] = fptosi float [[X]] to i32
+; CHECK-NEXT:    [[LO_CMP:%.*]] = icmp eq i32 [[I32_X]], 0
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[LO_CMP]], i32 1, i32 [[I32_MIN]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %float_min_cmp = fcmp ogt float %x, 255.0
+  %float_min = select i1 %float_min_cmp, float 255.0, float %x
+  %i32_min = fptosi float %float_min to i32
+  %i32_x = fptosi float %x to i32
+  %lo_cmp = icmp ult i32 %i32_x, 1
+  %r = select i1 %lo_cmp, i32 1, i32 %i32_min
+  ret i32 %r
+}
+
+define float @mixed_clamp_to_float_2(i32 %x) {
+; CHECK-LABEL: @mixed_clamp_to_float_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[SI_MIN:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[SI_MIN]], 1
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[TMP2]], i32 [[SI_MIN]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = sitofp i32 [[R1]] to float
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %si_min_cmp = icmp sgt i32 %x, 255
+  %si_min = select i1 %si_min_cmp, i32 255, i32 %x
+  %f_min = sitofp i32 %si_min to float
+  %lo_cmp = icmp slt i32 %x, 1
+  %r = select i1 %lo_cmp, float 1.0, float %f_min
+  ret float %r
+}
+
+define i32 @mixed_clamp_to_i32_2(float %x) {
+; CHECK-LABEL: @mixed_clamp_to_i32_2(
+; CHECK-NEXT:    [[FLOAT_MIN_CMP:%.*]] = fcmp ogt float [[X:%.*]], 2.550000e+02
+; CHECK-NEXT:    [[FLOAT_MIN:%.*]] = select i1 [[FLOAT_MIN_CMP]], float 2.550000e+02, float [[X]]
+; CHECK-NEXT:    [[I32_MIN:%.*]] = fptosi float [[FLOAT_MIN]] to i32
+; CHECK-NEXT:    [[LO_CMP:%.*]] = fcmp olt float [[X]], 1.000000e+00
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[LO_CMP]], i32 1, i32 [[I32_MIN]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %float_min_cmp = fcmp ogt float %x, 255.0
+  %float_min = select i1 %float_min_cmp, float 255.0, float %x
+  %i32_min = fptosi float %float_min to i32
+  %lo_cmp = fcmp olt float %x, 1.0
+  %r = select i1 %lo_cmp, i32 1, i32 %i32_min
+  ret i32 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll
new file mode 100644
index 00000000000..82b32ee1b7f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll
@@ -0,0 +1,493 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+declare i33 @llvm.cttz.i33(i33, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i8 @llvm.ctpop.i8(i8)
+declare i11 @llvm.ctpop.i11(i11)
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
+
+define i1 @bswap_eq_i16(i16 %x) {
+; CHECK-LABEL: @bswap_eq_i16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[X:%.*]], 256
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bs = call i16 @llvm.bswap.i16(i16 %x)
+  %cmp = icmp eq i16 %bs, 1
+  ret i1 %cmp
+}
+
+define i1 @bswap_ne_i32(i32 %x) {
+; CHECK-LABEL: @bswap_ne_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 33554432
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bs = tail call i32 @llvm.bswap.i32(i32 %x)
+  %cmp = icmp ne i32 %bs, 2
+  ret i1 %cmp
+}
+
+define <2 x i1> @bswap_eq_v2i64(<2 x i64> %x) {
+; CHECK-LABEL: @bswap_eq_v2i64(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 216172782113783808, i64 216172782113783808>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %bs = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %x)
+  %cmp = icmp eq <2 x i64> %bs, <i64 3, i64 3>
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctlz_eq_bitwidth_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_eq_bitwidth_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp eq i32 %lz, 32
+  ret i1 %cmp
+}
+
+define i1 @ctlz_eq_zero_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_eq_zero_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp eq i32 %lz, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctlz_ne_zero_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_ne_zero_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp ne <2 x i32> %x, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctlz_eq_bw_minus_1_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_eq_bw_minus_1_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp eq i32 %lz, 31
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctlz_ne_bw_minus_1_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_ne_bw_minus_1_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp ne <2 x i32> %x, <i32 31, i32 31>
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctlz_eq_other_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_eq_other_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -128
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 128
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp eq i32 %lz, 24
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctlz_ne_other_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_ne_other_v2i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -128, i32 -128>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 128, i32 128>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp ne <2 x i32> %x, <i32 24, i32 24>
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctlz_eq_other_i32_multiuse(i32 %x, i32* %p) {
+; CHECK-LABEL: @ctlz_eq_other_i32_multiuse(
+; CHECK-NEXT:    [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    store i32 [[LZ]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[LZ]], 24
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  store i32 %lz, i32* %p
+  %cmp = icmp eq i32 %lz, 24
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctlz_ne_bitwidth_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_ne_bitwidth_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp ne <2 x i32> %x, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctlz_ugt_zero_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_ugt_zero_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp ugt i32 %lz, 0
+  ret i1 %cmp
+}
+
+define i1 @ctlz_ugt_one_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_ugt_one_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 1073741824
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp ugt i32 %lz, 1
+  ret i1 %cmp
+}
+
+define i1 @ctlz_ugt_other_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_ugt_other_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32768
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp ugt i32 %lz, 16
+  ret i1 %cmp
+}
+
+define i1 @ctlz_ugt_other_multiuse_i32(i32 %x, i32* %p) {
+; CHECK-LABEL: @ctlz_ugt_other_multiuse_i32(
+; CHECK-NEXT:    [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    store i32 [[LZ]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X]], 32768
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  store i32 %lz, i32* %p
+  %cmp = icmp ugt i32 %lz, 16
+  ret i1 %cmp
+}
+
+define i1 @ctlz_ugt_bw_minus_one_i32(i32 %x) {
+; CHECK-LABEL: @ctlz_ugt_bw_minus_one_i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp ugt i32 %lz, 31
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctlz_ult_one_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @ctlz_ult_one_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ult <2 x i32> %lz, <i32 1, i32 1>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ctlz_ult_other_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @ctlz_ult_other_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ult <2 x i32> %lz, <i32 16, i32 16>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
+; CHECK-LABEL: @ctlz_ult_other_multiuse_v2i32(
+; CHECK-NEXT:    [[LZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
+; CHECK-NEXT:    store <2 x i32> [[LZ]], <2 x i32>* [[P:%.*]], align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], <i32 65535, i32 65535>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
+  store <2 x i32> %lz, <2 x i32>* %p
+  %cmp = icmp ult <2 x i32> %lz, <i32 16, i32 16>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @ctlz_ult_bw_minus_one_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ult <2 x i32> %lz, <i32 31, i32 31>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ctlz_ult_bitwidth_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @ctlz_ult_bitwidth_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ult <2 x i32> %lz, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
+define i1 @cttz_ne_bitwidth_i33(i33 %x) {
+; CHECK-LABEL: @cttz_ne_bitwidth_i33(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i33 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp ne i33 %tz, 33
+  ret i1 %cmp
+}
+
+define <2 x i1> @cttz_eq_bitwidth_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_eq_bitwidth_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp eq <2 x i32> %x, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
+define i1 @cttz_eq_zero_i33(i33 %x) {
+; CHECK-LABEL: @cttz_eq_zero_i33(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i33 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i33 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp eq i33 %tz, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @cttz_ne_zero_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_ne_zero_v2i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp ne <2 x i32> %x, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @cttz_eq_bw_minus_1_i33(i33 %x) {
+; CHECK-LABEL: @cttz_eq_bw_minus_1_i33(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[X:%.*]], -4294967296
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp eq i33 %tz, 32
+  ret i1 %cmp
+}
+
+define <2 x i1> @cttz_ne_bw_minus_1_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_ne_bw_minus_1_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp ne <2 x i32> %x, <i32 31, i32 31>
+  ret <2 x i1> %cmp
+}
+
+define i1 @cttz_eq_other_i33(i33 %x) {
+; CHECK-LABEL: @cttz_eq_other_i33(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i33 [[X:%.*]], 31
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[TMP1]], 16
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp eq i33 %tz, 4
+  ret i1 %cmp
+}
+
+define <2 x i1> @cttz_ne_other_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_ne_other_v2i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
+  %cmp = icmp ne <2 x i32> %x, <i32 4, i32 4>
+  ret <2 x i1> %cmp
+}
+
+define i1 @cttz_eq_other_i33_multiuse(i33 %x, i33* %p) {
+; CHECK-LABEL: @cttz_eq_other_i33_multiuse(
+; CHECK-NEXT:    [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    store i33 [[TZ]], i33* [[P:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[TZ]], 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  store i33 %tz, i33* %p
+  %cmp = icmp eq i33 %tz, 4
+  ret i1 %cmp
+}
+
+define i1 @cttz_ugt_zero_i33(i33 %x) {
+; CHECK-LABEL: @cttz_ugt_zero_i33(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i33 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp ugt i33 %tz, 0
+  ret i1 %cmp
+}
+
+define i1 @cttz_ugt_one_i33(i33 %x) {
+; CHECK-LABEL: @cttz_ugt_one_i33(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i33 [[X:%.*]], 3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp ugt i33 %tz, 1
+  ret i1 %cmp
+}
+
+define i1 @cttz_ugt_other_i33(i33 %x) {
+; CHECK-LABEL: @cttz_ugt_other_i33(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i33 [[X:%.*]], 131071
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp ugt i33 %tz, 16
+  ret i1 %cmp
+}
+
+define i1 @cttz_ugt_other_multiuse_i33(i33 %x, i33* %p) {
+; CHECK-LABEL: @cttz_ugt_other_multiuse_i33(
+; CHECK-NEXT:    [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    store i33 [[TZ]], i33* [[P:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i33 [[TZ]], 16
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  store i33 %tz, i33* %p
+  %cmp = icmp ugt i33 %tz, 16
+  ret i1 %cmp
+}
+
+define i1 @cttz_ugt_bw_minus_one_i33(i33 %x) {
+; CHECK-LABEL: @cttz_ugt_bw_minus_one_i33(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i33 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false)
+  %cmp = icmp ugt i33 %tz, 32
+  ret i1 %cmp
+}
+
+define <2 x i1> @cttz_ult_one_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @cttz_ult_one_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ult <2 x i32> %tz, <i32 1, i32 1>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @cttz_ult_other_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @cttz_ult_other_v2i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ult <2 x i32> %tz, <i32 16, i32 16>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, <2 x i32>* %p) {
+; CHECK-LABEL: @cttz_ult_other_multiuse_v2i32(
+; CHECK-NEXT:    [[TZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
+; CHECK-NEXT:    store <2 x i32> [[TZ]], <2 x i32>* [[P:%.*]], align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 16, i32 16>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
+  store <2 x i32> %tz, <2 x i32>* %p
+  %cmp = icmp ult <2 x i32> %tz, <i32 16, i32 16>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @cttz_ult_bw_minus_one_v2i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ult <2 x i32> %tz, <i32 31, i32 31>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @cttz_ult_bitwidth_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @cttz_ult_bitwidth_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ult <2 x i32> %tz, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctpop_eq_zero_i11(i11 %x) {
+; CHECK-LABEL: @ctpop_eq_zero_i11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i11 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %pop = tail call i11 @llvm.ctpop.i11(i11 %x)
+  %cmp = icmp eq i11 %pop, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctpop_ne_zero_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @ctpop_ne_zero_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
+  %cmp = icmp ne <2 x i32> %pop, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @ctpop_eq_bitwidth_i8(i8 %x) {
+; CHECK-LABEL: @ctpop_eq_bitwidth_i8(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
+  %cmp = icmp eq i8 %pop, 8
+  ret i1 %cmp
+}
+
+define <2 x i1> @ctpop_ne_bitwidth_v2i32(<2 x i32> %x) {
+; CHECK-LABEL: @ctpop_ne_bitwidth_v2i32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
+  %cmp = icmp ne <2 x i32> %pop, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/compare-3way.ll b/llvm/test/Transforms/InstCombine/compare-3way.ll
new file mode 100644
index 00000000000..663d470df87
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/compare-3way.ll
@@ -0,0 +1,395 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @use(i32)
+
+; These 18 exercise all combinations of signed comparison
+; for each of the three values produced by your typical 
+; 3way compare function (-1, 0, 1)
+
+define void @test_low_sgt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_sgt
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sgt i32 %result, -1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_low_slt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_slt
+; CHECK: br i1 false, label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp slt i32 %result, -1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_low_sge(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_sge
+; CHECK: br i1 true, label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sge i32 %result, -1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_low_sle(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_sle
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sle i32 %result, -1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_low_ne(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_ne
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp ne i32 %result, -1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_low_eq(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_eq
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp eq i32 %result, -1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_mid_sgt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_sgt
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sgt i32 %result, 0
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_mid_slt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_slt
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp slt i32 %result, 0
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_mid_sge(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_sge
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sge i32 %result, 0
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_mid_sle(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_sle
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sle i32 %result, 0
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_mid_ne(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_ne
+; CHECK: [[TMP1:%.*]] = icmp eq i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp ne i32 %result, 0
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_mid_eq(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_eq
+; CHECK: icmp eq i64 %a, %b
+; CHECK: br i1 %eq, label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp eq i32 %result, 0
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_high_sgt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_sgt
+; CHECK: br i1 false, label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sgt i32 %result, 1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_high_slt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_slt
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp slt i32 %result, 1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_high_sge(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_sge
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sge i32 %result, 1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_high_sle(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_sle
+; CHECK: br i1 true, label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp sle i32 %result, 1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_high_ne(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_ne
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp ne i32 %result, 1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @test_high_eq(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_eq
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -1, i32 1
+  %result = select i1 %eq, i32 0, i32 %.
+  %cmp = icmp eq i32 %result, 1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+; These five make sure we didn't accidentally hard code one of the
+; produced values
+
+define void @non_standard_low(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_low
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -3, i32 -1
+  %result = select i1 %eq, i32 -2, i32 %.
+  %cmp = icmp eq i32 %result, -3
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @non_standard_mid(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_mid
+; CHECK: icmp eq i64 %a, %b
+; CHECK: br i1 %eq, label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -3, i32 -1
+  %result = select i1 %eq, i32 -2, i32 %.
+  %cmp = icmp eq i32 %result, -2
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @non_standard_high(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_high
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -3, i32 -1
+  %result = select i1 %eq, i32 -2, i32 %.
+  %cmp = icmp eq i32 %result, -1
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @non_standard_bound1(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_bound1
+; CHECK: br i1 false, label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -3, i32 -1
+  %result = select i1 %eq, i32 -2, i32 %.
+  %cmp = icmp eq i32 %result, -20
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
+
+define void @non_standard_bound2(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_bound2
+; CHECK: br i1 false, label %unreached, label %normal
+  %eq = icmp eq i64 %a, %b
+  %slt = icmp slt i64 %a, %b
+  %. = select i1 %slt, i32 -3, i32 -1
+  %result = select i1 %eq, i32 -2, i32 %.
+  %cmp = icmp eq i32 %result, 0
+  br i1 %cmp, label %unreached, label %normal
+normal:
+  ret void
+unreached:
+  call void @use(i32 %result)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/compare-alloca.ll b/llvm/test/Transforms/InstCombine/compare-alloca.ll
new file mode 100644
index 00000000000..414a07825f2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/compare-alloca.ll
@@ -0,0 +1,97 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+target datalayout = "p:32:32"
+
+
+define i1 @alloca_argument_compare(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %arg, %alloc
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare
+  ; CHECK: ret i1 false
+}
+
+define i1 @alloca_argument_compare_swapped(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %alloc, %arg
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_swapped
+  ; CHECK: ret i1 false
+}
+
+define i1 @alloca_argument_compare_ne(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp ne i64* %arg, %alloc
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_ne
+  ; CHECK: ret i1 true
+}
+
+define i1 @alloca_argument_compare_derived_ptrs(i64* %arg, i64 %x) {
+  %alloc = alloca i64, i64 8
+  %p = getelementptr i64, i64* %arg, i64 %x
+  %q = getelementptr i64, i64* %alloc, i64 3
+  %cmp = icmp eq i64* %p, %q
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_derived_ptrs
+  ; CHECK: ret i1 false
+}
+
+declare void @escape(i64*)
+define i1 @alloca_argument_compare_escaped_alloca(i64* %arg) {
+  %alloc = alloca i64
+  call void @escape(i64* %alloc)
+  %cmp = icmp eq i64* %alloc, %arg
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_escaped_alloca
+  ; CHECK: %cmp = icmp eq i64* %alloc, %arg
+  ; CHECK: ret i1 %cmp
+}
+
+declare void @check_compares(i1, i1)
+define void @alloca_argument_compare_two_compares(i64* %p) {
+  %q = alloca i64, i64 8
+  %r = getelementptr i64, i64* %p, i64 1
+  %s = getelementptr i64, i64* %q, i64 2
+  %cmp1 = icmp eq i64* %p, %q
+  %cmp2 = icmp eq i64* %r, %s
+  call void @check_compares(i1 %cmp1, i1 %cmp2)
+  ret void
+  ; We will only fold if there is a single cmp.
+  ; CHECK-LABEL: alloca_argument_compare_two_compares
+  ; CHECK: call void @check_compares(i1 %cmp1, i1 %cmp2)
+}
+
+define i1 @alloca_argument_compare_escaped_through_store(i64* %arg, i64** %ptr) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %alloc, %arg
+  %p = getelementptr i64, i64* %alloc, i64 1
+  store i64* %p, i64** %ptr
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_escaped_through_store
+  ; CHECK: %cmp = icmp eq i64* %alloc, %arg
+  ; CHECK: ret i1 %cmp
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+define i1 @alloca_argument_compare_benign_instrs(i8* %arg) {
+  %alloc = alloca i8
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %alloc)
+  %cmp = icmp eq i8* %arg, %alloc
+  %x = load i8, i8* %arg
+  store i8 %x, i8* %alloc
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %alloc)
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_benign_instrs
+  ; CHECK: ret i1 false
+}
+
+declare i64* @allocator()
+define i1 @alloca_call_compare() {
+  %p = alloca i64
+  %q = call i64* @allocator()
+  %cmp = icmp eq i64* %p, %q
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_call_compare
+  ; CHECK: ret i1 false
+}
diff --git a/llvm/test/Transforms/InstCombine/compare-signs.ll b/llvm/test/Transforms/InstCombine/compare-signs.ll
new file mode 100644
index 00000000000..c6c56f2361e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/compare-signs.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; PR5438
+
+define i32 @test1(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[DOTLOBIT:%.*]] = lshr i32 [[TMP1]], 31
+; CHECK-NEXT:    [[DOTLOBIT_NOT:%.*]] = xor i32 [[DOTLOBIT]], 1
+; CHECK-NEXT:    ret i32 [[DOTLOBIT_NOT]]
+;
+  %t0 = icmp sgt i32 %a, -1
+  %t1 = icmp slt i32 %b, 0
+  %t2 = xor i1 %t1, %t0
+  %t3 = zext i1 %t2 to i32
+  ret i32 %t3
+}
+
+; TODO: This optimizes partially but not all the way.
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 3
+; CHECK-NEXT:    [[DOTLOBIT:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[DOTLOBIT]], 1
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %t0 = and i32 %a, 8
+  %t1 = and i32 %b, 8
+  %t2 = icmp eq i32 %t0, %t1
+  %t3 = zext i1 %t2 to i32
+  ret i32 %t3
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[T2_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[T2_UNSHIFTED_LOBIT:%.*]] = lshr i32 [[T2_UNSHIFTED]], 31
+; CHECK-NEXT:    [[T2_UNSHIFTED_LOBIT_NOT:%.*]] = xor i32 [[T2_UNSHIFTED_LOBIT]], 1
+; CHECK-NEXT:    ret i32 [[T2_UNSHIFTED_LOBIT_NOT]]
+;
+  %t0 = lshr i32 %a, 31
+  %t1 = lshr i32 %b, 31
+  %t2 = icmp eq i32 %t0, %t1
+  %t3 = zext i1 %t2 to i32
+  ret i32 %t3
+}
+
+; TODO this should optimize but doesn't due to missing vector support in InstCombiner::foldICmpEquality.
+define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[T3]]
+;
+  %t0 = lshr <2 x i32> %a, <i32 31, i32 31>
+  %t1 = lshr <2 x i32> %b, <i32 31, i32 31>
+  %t2 = icmp eq <2 x i32> %t0, %t1
+  %t3 = zext <2 x i1> %t2 to <2 x i32>
+  ret <2 x i32> %t3
+}
+
+; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
+; is one, not zero.
+define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: @test3i(
+; CHECK-NEXT:    [[T01:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[T01]], 31
+; CHECK-NEXT:    [[T4:%.*]] = xor i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i32 [[T4]]
+;
+  %t0 = lshr i32 %a, 29
+  %t1 = lshr i32 %b, 29
+  %t2 = or i32 %t0, 35
+  %t3 = or i32 %t1, 35
+  %t4 = icmp eq i32 %t2, %t3
+  %t5 = zext i1 %t4 to i32
+  ret i32 %t5
+}
+
+define i1 @test4a(i32 %a) {
+; CHECK-LABEL: @test4a(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %l = ashr i32 %a, 31
+  %na = sub i32 0, %a
+  %r = lshr i32 %na, 31
+  %signum = or i32 %l, %r
+  %c = icmp slt i32 %signum, 1
+  ret i1 %c
+}
+
+define <2 x i1> @test4a_vec(<2 x i32> %a) {
+; CHECK-LABEL: @test4a_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %l = ashr <2 x i32> %a, <i32 31, i32 31>
+  %na = sub <2 x i32> zeroinitializer, %a
+  %r = lshr <2 x i32> %na, <i32 31, i32 31>
+  %signum = or <2 x i32> %l, %r
+  %c = icmp slt <2 x i32> %signum, <i32 1, i32 1>
+  ret <2 x i1> %c
+}
+
+define i1 @test4b(i64 %a) {
+; CHECK-LABEL: @test4b(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i64 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %l = ashr i64 %a, 63
+  %na = sub i64 0, %a
+  %r = lshr i64 %na, 63
+  %signum = or i64 %l, %r
+  %c = icmp slt i64 %signum, 1
+  ret i1 %c
+}
+
+define i1 @test4c(i64 %a) {
+; CHECK-LABEL: @test4c(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i64 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %l = ashr i64 %a, 63
+  %na = sub i64 0, %a
+  %r = lshr i64 %na, 63
+  %signum = or i64 %l, %r
+  %signum.trunc = trunc i64 %signum to i32
+  %c = icmp slt i32 %signum.trunc, 1
+  ret i1 %c
+}
+
+define <2 x i1> @test4c_vec(<2 x i64> %a) {
+; CHECK-LABEL: @test4c_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i64> [[A:%.*]], <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %l = ashr <2 x i64> %a, <i64 63, i64 63>
+  %na = sub <2 x i64> zeroinitializer, %a
+  %r = lshr <2 x i64> %na, <i64 63, i64 63>
+  %signum = or <2 x i64> %l, %r
+  %signum.trunc = trunc <2 x i64> %signum to <2 x i32>
+  %c = icmp slt <2 x i32> %signum.trunc, <i32 1, i32 1>
+  ret <2 x i1> %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/compare-udiv.ll b/llvm/test/Transforms/InstCombine/compare-udiv.ll
new file mode 100644
index 00000000000..a15d15feae1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/compare-udiv.ll
@@ -0,0 +1,318 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i1 @test1(i32 %n, i32 %d) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 %d, %n
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 %n, %d
+  %cmp1 = icmp eq i32 %div, 0
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test1vec(<2 x i32> %n, <2 x i32> %d) {
+; CHECK-LABEL: @test1vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> %d, %n
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> %n, %d
+  %cmp1 = icmp eq <2 x i32> %div, zeroinitializer
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test2(i32 %d) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 %d, 64
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 64, %d
+  %cmp1 = icmp eq i32 %div, 0
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test2vec(<2 x i32> %d) {
+; CHECK-LABEL: @test2vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> %d, <i32 64, i32 63>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 64, i32 63>, %d
+  %cmp1 = icmp eq <2 x i32> %div, zeroinitializer
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test3(i32 %n, i32 %d) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ule i32 %d, %n
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 %n, %d
+  %cmp1 = icmp ne i32 %div, 0
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test3vec(<2 x i32> %n, <2 x i32> %d) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ule <2 x i32> %d, %n
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> %n, %d
+  %cmp1 = icmp ne <2 x i32> %div, zeroinitializer
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test4(i32 %d) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 %d, 65
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 64, %d
+  %cmp1 = icmp ne i32 %div, 0
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test4vec(<2 x i32> %d) {
+; CHECK-LABEL: @test4vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> %d, <i32 65, i32 66>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 64, i32 65>, %d
+  %cmp1 = icmp ne <2 x i32> %div, zeroinitializer
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test5(i32 %d) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i1 true
+;
+  %div = udiv i32 -1, %d
+  %cmp1 = icmp ne i32 %div, 0
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test5vec(<2 x i32> %d) {
+; CHECK-LABEL: @test5vec(
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
+;
+  %div = udiv <2 x i32> <i32 -1, i32 -1>, %d
+  %cmp1 = icmp ne <2 x i32> %div, zeroinitializer
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test6(i32 %d) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 %d, 6
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 5, %d
+  %cmp1 = icmp ugt i32 %div, 0
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test6vec(<2 x i32> %d) {
+; CHECK-LABEL: @test6vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> %d, <i32 6, i32 6>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 5, i32 5>, %d
+  %cmp1 = icmp ugt <2 x i32> %div, zeroinitializer
+  ret <2 x i1> %cmp1
+}
+
+; (icmp ugt (udiv C1, X), C1) -> false.
+define i1 @test7(i32 %d) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i1 false
+;
+  %div = udiv i32 8, %d
+  %cmp1 = icmp ugt i32 %div, 8
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test7vec(<2 x i32> %d) {
+; CHECK-LABEL: @test7vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %div = udiv <2 x i32> <i32 8, i32 8>, %d
+  %cmp1 = icmp ugt <2 x i32> %div, <i32 8, i32 8>
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test8(i32 %d) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 %d, 2
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ugt i32 %div, 3
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test8vec(<2 x i32> %d) {
+; CHECK-LABEL: @test8vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> %d, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ugt <2 x i32> %div, <i32 3, i32 3>
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test9(i32 %d) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 %d, 2
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ugt i32 %div, 2
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test9vec(<2 x i32> %d) {
+; CHECK-LABEL: @test9vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> %d, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ugt <2 x i32> %div, <i32 2, i32 2>
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test10(i32 %d) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 %d, 3
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ugt i32 %div, 1
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test10vec(<2 x i32> %d) {
+; CHECK-LABEL: @test10vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> %d, <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ugt <2 x i32> %div, <i32 1, i32 1>
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test11(i32 %d) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 %d, 4
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ult i32 %div, 1
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test11vec(<2 x i32> %d) {
+; CHECK-LABEL: @test11vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> %d, <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ult <2 x i32> %div, <i32 1, i32 1>
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test12(i32 %d) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 %d, 2
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ult i32 %div, 2
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test12vec(<2 x i32> %d) {
+; CHECK-LABEL: @test12vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> %d, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ult <2 x i32> %div, <i32 2, i32 2>
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test13(i32 %d) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 %d, 1
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ult i32 %div, 3
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test13vec(<2 x i32> %d) {
+; CHECK-LABEL: @test13vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> %d, <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ult <2 x i32> %div, <i32 3, i32 3>
+  ret <2 x i1> %cmp1
+}
+
+define i1 @test14(i32 %d) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 %d, 1
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ult i32 %div, 4
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test14vec(<2 x i32> %d) {
+; CHECK-LABEL: @test14vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> %d, <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ult <2 x i32> %div, <i32 4, i32 4>
+  ret <2 x i1> %cmp1
+}
+
+; icmp ugt X, UINT_MAX -> false.
+define i1 @test15(i32 %d) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    ret i1 false
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ugt i32 %div, -1
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test15vec(<2 x i32> %d) {
+; CHECK-LABEL: @test15vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ugt <2 x i32> %div, <i32 -1, i32 -1>
+  ret <2 x i1> %cmp1
+}
+
+; icmp ult X, UINT_MAX -> true.
+define i1 @test16(i32 %d) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    ret i1 true
+;
+  %div = udiv i32 4, %d
+  %cmp1 = icmp ult i32 %div, -1
+  ret i1 %cmp1
+}
+
+define <2 x i1> @test16vec(<2 x i32> %d) {
+; CHECK-LABEL: @test16vec(
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
+;
+  %div = udiv <2 x i32> <i32 4, i32 4>, %d
+  %cmp1 = icmp ult <2 x i32> %div, <i32 -1, i32 -1>
+  ret <2 x i1> %cmp1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/compare-unescaped.ll b/llvm/test/Transforms/InstCombine/compare-unescaped.ll
new file mode 100644
index 00000000000..d15fc2fd449
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/compare-unescaped.ll
@@ -0,0 +1,164 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+@gp = global i32* null, align 8
+
+declare i8* @malloc(i64) #1
+
+define i1 @compare_global_trivialeq() {
+  %m = call i8* @malloc(i64 4)
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8
+  %cmp = icmp eq i32* %bc, %lgp
+  ret i1 %cmp
+; CHECK-LABEL: compare_global_trivialeq
+; CHECK: ret i1 false
+}
+
+define i1 @compare_global_trivialne() {
+  %m = call i8* @malloc(i64 4)
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8
+  %cmp = icmp ne i32* %bc, %lgp
+  ret i1 %cmp
+; CHECK-LABEL: compare_global_trivialne
+; CHECK: ret i1 true
+}
+
+
+; Although the %m is marked nocapture in the deopt operand in call to function f,
+; we cannot remove the alloc site: call to malloc
+; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
+declare void @f()
+define i1 @compare_and_call_with_deopt() {
+; CHECK-LABEL: compare_and_call_with_deopt
+  %m = call i8* @malloc(i64 24)
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
+  %cmp = icmp eq i32* %lgp, %bc
+  tail call void @f() [ "deopt"(i8* %m) ]
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+; Same functon as above with deopt operand in function f, but comparison is NE
+define i1 @compare_ne_and_call_with_deopt() {
+; CHECK-LABEL: compare_ne_and_call_with_deopt
+  %m = call i8* @malloc(i64 24)
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
+  %cmp = icmp ne i32* %lgp, %bc
+  tail call void @f() [ "deopt"(i8* %m) ]
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+; Same function as above, but global not marked nonnull, and we cannot fold the comparison
+define i1 @compare_ne_global_maybe_null() {
+; CHECK-LABEL: compare_ne_global_maybe_null
+  %m = call i8* @malloc(i64 24)
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp
+  %cmp = icmp ne i32* %lgp, %bc
+  tail call void @f() [ "deopt"(i8* %m) ]
+  ret i1 %cmp
+; CHECK: ret i1 %cmp
+}
+
+; FIXME: The comparison should fold to false since %m escapes (call to function escape)
+; after the comparison.
+declare void @escape(i8*)
+define i1 @compare_and_call_after() {
+; CHECK-LABEL: compare_and_call_after
+  %m = call i8* @malloc(i64 24)
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
+  %cmp = icmp eq i32* %bc, %lgp
+  br i1 %cmp, label %escape_call, label %just_return
+
+escape_call:
+ call void @escape(i8* %m)
+ ret i1 true
+
+just_return:
+ ret i1 %cmp
+}
+
+define i1 @compare_distinct_mallocs() {
+  %m = call i8* @malloc(i64 4)
+  %n = call i8* @malloc(i64 4)
+  %cmp = icmp eq i8* %m, %n
+  ret i1 %cmp
+  ; CHECK-LABEL: compare_distinct_mallocs
+  ; CHECK: ret i1 false
+}
+
+; the compare is folded to true since the folding compare looks through bitcasts. 
+; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc 
+define i1 @compare_samepointer_under_bitcast() {
+  %m = call i8* @malloc(i64 4)
+  %bc = bitcast i8* %m to i32*
+  %bcback = bitcast i32* %bc to i8*
+  %cmp = icmp eq i8* %m, %bcback
+  ret i1 %cmp
+; CHECK-LABEL: compare_samepointer_under_bitcast
+; CHECK: ret i1 true 
+}
+
+; the compare is folded to true since the folding compare looks through bitcasts. 
+; The malloc call for %m cannot be elided since it is used in the call to function f.
+define i1 @compare_samepointer_escaped() {
+  %m = call i8* @malloc(i64 4)
+  %bc = bitcast i8* %m to i32*
+  %bcback = bitcast i32* %bc to i8*
+  %cmp = icmp eq i8* %m, %bcback
+  call void @f() [ "deopt"(i8* %m) ]
+  ret i1 %cmp
+; CHECK-LABEL: compare_samepointer_escaped
+; CHECK-NEXT: %m = call i8* @malloc(i64 4)
+; CHECK-NEXT: call void @f() [ "deopt"(i8* %m) ]
+; CHECK: ret i1 true 
+}
+
+; Technically, we can fold the %cmp2 comparison, even though %m escapes through
+; the ret statement since `ret` terminates the function and we cannot reach from
+; the ret to cmp. 
+; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
+; cross-threading data dependencies since we do not make the distinction between
+; atomic and non-atomic loads in capture tracking.
+define i8* @compare_ret_escape(i8* %c) {
+  %m = call i8* @malloc(i64 4)
+  %n = call i8* @malloc(i64 4)
+  %cmp = icmp eq i8* %n, %c
+  br i1 %cmp, label %retst, label %chk
+
+retst:
+  ret i8* %m
+
+chk:
+  %bc = bitcast i8* %m to i32*
+  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
+  %cmp2 = icmp eq i32* %bc, %lgp
+  br i1 %cmp2, label %retst,  label %chk2
+
+chk2:
+  ret i8* %n
+; CHECK-LABEL: compare_ret_escape
+; CHECK: %cmp = icmp eq i8* %n, %c
+; CHECK: %cmp2 = icmp eq i32* %lgp, %bc
+}
+
+; The malloc call for %m cannot be elided since it is used in the call to function f.
+; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
+define i1 @compare_distinct_pointer_escape() {
+  %m = call i8* @malloc(i64 4)
+  %n = call i8* @malloc(i64 4)
+  tail call void @f() [ "deopt"(i8* %m) ]
+  %cmp = icmp ne i8* %m, %n
+  ret i1 %cmp
+; CHECK-LABEL: compare_distinct_pointer_escape
+; CHECK-NEXT: %m = call i8* @malloc(i64 4)
+; CHECK-NEXT: tail call void @f() [ "deopt"(i8* %m) ]
+; CHECK-NEXT: ret i1 true
+}
+
+!0 = !{}
diff --git a/llvm/test/Transforms/InstCombine/consecutive-fences.ll b/llvm/test/Transforms/InstCombine/consecutive-fences.ll
new file mode 100644
index 00000000000..2b8a8e72a62
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/consecutive-fences.ll
@@ -0,0 +1,73 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+
+; Make sure we collapse the fences in this case
+
+; CHECK-LABEL: define void @tinkywinky
+; CHECK-NEXT:   fence seq_cst
+; CHECK-NEXT:   fence syncscope("singlethread") acquire
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @tinkywinky() {
+  fence seq_cst
+  fence seq_cst
+  fence seq_cst
+  fence syncscope("singlethread") acquire
+  fence syncscope("singlethread") acquire
+  fence syncscope("singlethread") acquire
+  ret void
+}
+
+; CHECK-LABEL: define void @dipsy
+; CHECK-NEXT:   fence seq_cst
+; CHECK-NEXT:   fence syncscope("singlethread") seq_cst
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @dipsy() {
+  fence seq_cst
+  fence syncscope("singlethread") seq_cst
+  ret void
+}
+
+; CHECK-LABEL: define void @patatino
+; CHECK-NEXT:   fence acquire
+; CHECK-NEXT:   fence seq_cst
+; CHECK-NEXT:   fence acquire
+; CHECK-NEXT:   fence seq_cst
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @patatino() {
+  fence acquire
+  fence seq_cst
+  fence acquire
+  fence seq_cst
+  ret void
+}
+
+; CHECK-LABEL: define void @debug
+; CHECK-NOT: fence
+; CHECK: call void @llvm.dbg.value
+; CHECK: fence seq_cst
+define void @debug() {
+  fence seq_cst
+  tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9
+  fence seq_cst
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5, !6, !7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "Me", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: null, retainedTypes: null, imports: null)
+!1 = !DILocalVariable(name: "", arg: 1, scope: !2, file: null, line: 1, type: null)
+!2 = distinct !DISubprogram(name: "debug", linkageName: "debug", scope: null, file: null, line: 0, type: null, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!3 = !DIFile(filename: "consecutive-fences.ll", directory: "")
+!5 = !{i32 2, !"Dwarf Version", i32 4}
+!6 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = !{i32 1, !"wchar_size", i32 4}
+!8 = !{i32 7, !"PIC Level", i32 2}
+!9 = !DILocation(line: 0, column: 0, scope: !2)
diff --git a/llvm/test/Transforms/InstCombine/constant-expr-datalayout.ll b/llvm/test/Transforms/InstCombine/constant-expr-datalayout.ll
new file mode 100644
index 00000000000..cdecfc91f59
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-expr-datalayout.ll
@@ -0,0 +1,12 @@
+; RUN: opt -instcombine %s -S -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%test1.struct = type { i32, i32 }
+@test1.aligned_glbl = global %test1.struct zeroinitializer, align 4
+define void @test1(i64 *%ptr) {
+  store i64 and (i64 ptrtoint (i32* getelementptr (%test1.struct, %test1.struct* @test1.aligned_glbl, i32 0, i32 1) to i64), i64 3), i64* %ptr
+; CHECK: store i64 0, i64* %ptr
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
new file mode 100644
index 00000000000..b879b1a2f8f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -0,0 +1,241 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+@g = addrspace(3) global i32 89
+
+@const_zero_i8_as1 = addrspace(1) constant i8 0
+@const_zero_i32_as1 = addrspace(1) constant i32 0
+
+@const_zero_i8_as2 = addrspace(2) constant i8 0
+@const_zero_i32_as2 = addrspace(2) constant i32 0
+
+@const_zero_i8_as3 = addrspace(3) constant i8 0
+@const_zero_i32_as3 = addrspace(3) constant i32 0
+
+; Test constant folding of inttoptr (ptrtoint constantexpr)
+; The intermediate integer size is the same as the pointer size
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_same_size() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_same_size(
+; CHECK-NEXT: ret i32 addrspace(3)* @const_zero_i32_as3
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+; The intermediate integer size is larger than the pointer size
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller(
+; CHECK-NEXT: ret i32 addrspace(2)* @const_zero_i32_as2
+  %x = ptrtoint i32 addrspace(2)* @const_zero_i32_as2 to i16
+  %y = inttoptr i16 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; Different address spaces that are the same size, but they are
+; different so nothing should happen
+define i32 addrspace(4)* @test_constant_fold_inttoptr_as_pointer_smaller_different_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_as(
+; CHECK-NEXT: ret i32 addrspace(4)* inttoptr (i16 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i16) to i32 addrspace(4)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i16
+  %y = inttoptr i16 %x to i32 addrspace(4)*
+  ret i32 addrspace(4)* %y
+}
+
+; Make sure we don't introduce a bitcast between different sized
+; address spaces when folding this
+define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as(
+; CHECK-NEXT: ret i32 addrspace(2)* inttoptr (i32 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i32) to i32 addrspace(2)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to i32 addrspace(2)*
+  ret i32 addrspace(2)* %y
+}
+
+; The intermediate integer size is too small, nothing should happen
+define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_larger() {
+; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_larger(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i8 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i8) to i32 addrspace(3)*)
+  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i8
+  %y = inttoptr i8 %x to i32 addrspace(3)*
+  ret i32 addrspace(3)* %y
+}
+
+define i8 @const_fold_ptrtoint() {
+; CHECK-LABEL: @const_fold_ptrtoint(
+; CHECK-NEXT: ret i8 4
+  ret i8 ptrtoint (i32 addrspace(2)* inttoptr (i4 4 to i32 addrspace(2)*) to i8)
+}
+
+; Test that mask happens when the destination pointer is smaller than
+; the original
+define i8 @const_fold_ptrtoint_mask() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask(
+; CHECK-NEXT: ret i8 1
+  ret i8 ptrtoint (i32 addrspace(3)* inttoptr (i32 257 to i32 addrspace(3)*) to i8)
+}
+
+; Address space 0 is too small for the correct mask, should mask with
+; 64-bits instead of 32
+define i64 @const_fold_ptrtoint_mask_small_as0() {
+; CHECK-LABEL: @const_fold_ptrtoint_mask_small_as0(
+; CHECK: ret i64 -1
+  ret i64 ptrtoint (i32 addrspace(1)* inttoptr (i128 -1 to i32 addrspace(1)*) to i64)
+}
+
+define i32 addrspace(3)* @const_inttoptr() {
+; CHECK-LABEL: @const_inttoptr(
+; CHECK-NEXT: ret i32 addrspace(3)* inttoptr (i16 4 to i32 addrspace(3)*)
+  %p = inttoptr i16 4 to i32 addrspace(3)*
+  ret i32 addrspace(3)* %p
+}
+
+define i16 @const_ptrtoint() {
+; CHECK-LABEL: @const_ptrtoint(
+; CHECK-NEXT: ret i16 ptrtoint (i32 addrspace(3)* @g to i16)
+  %i = ptrtoint i32 addrspace(3)* @g to i16
+  ret i16 %i
+}
+
+define i16 @const_inttoptr_ptrtoint() {
+; CHECK-LABEL: @const_inttoptr_ptrtoint(
+; CHECK-NEXT: ret i16 9
+  ret i16 ptrtoint (i32 addrspace(3)* inttoptr (i16 9 to i32 addrspace(3)*) to i16)
+}
+
+define i1 @constant_fold_cmp_constantexpr_inttoptr() {
+; CHECK-LABEL: @constant_fold_cmp_constantexpr_inttoptr(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 0 to i32 addrspace(3)*), null
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr_null(i16 %i) {
+; CHECK-LABEL: @constant_fold_inttoptr_null(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 0 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null(
+; CHECK-NEXT: ret i1 icmp eq (i32 addrspace(3)* @g, i32 addrspace(3)* null)
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* null to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint_null_2() {
+; CHECK-LABEL: @constant_fold_ptrtoint_null_2(
+; CHECK-NEXT: ret i1 icmp eq (i32 addrspace(3)* @g, i32 addrspace(3)* null)
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* null to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_ptrtoint() {
+; CHECK-LABEL: @constant_fold_ptrtoint(
+; CHECK-NEXT: ret i1 true
+  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  ret i1 %x
+}
+
+define i1 @constant_fold_inttoptr() {
+; CHECK-LABEL: @constant_fold_inttoptr(
+; CHECK-NEXT: ret i1 false
+  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 27 to i32 addrspace(3)*)
+  ret i1 %x
+}
+
+@g_float_as3 = addrspace(3) global float zeroinitializer
+@g_v4f_as3 = addrspace(3) global <4 x float> zeroinitializer
+
+define float @constant_fold_bitcast_ftoi_load() {
+; CHECK-LABEL: @constant_fold_bitcast_ftoi_load(
+; CHECK: load float, float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  %a = load float, float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  ret float %a
+}
+
+define i32 @constant_fold_bitcast_itof_load() {
+; CHECK-LABEL: @constant_fold_bitcast_itof_load(
+; CHECK: load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  %a = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  ret i32 %a
+}
+
+define <4 x float> @constant_fold_bitcast_vector_as() {
+; CHECK-LABEL: @constant_fold_bitcast_vector_as(
+; CHECK: load <4 x float>, <4 x float> addrspace(3)* @g_v4f_as3, align 16
+  %a = load <4 x float>, <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
+  ret <4 x float> %a
+}
+
+@i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
+
+define i32 @test_cast_gep_small_indices_as() {
+; CHECK-LABEL: @test_cast_gep_small_indices_as(
+; CHECK: load i32, i32 addrspace(3)* getelementptr inbounds ([10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i7 0, i7 0
+   %x = load i32, i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+%struct.foo = type { float, float, [4 x i32], i32 addrspace(3)* }
+
+@constant_fold_global_ptr = addrspace(3) global %struct.foo {
+  float 0.0,
+  float 0.0,
+  [4 x i32] zeroinitializer,
+  i32 addrspace(3)* getelementptr ([10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0)
+}
+
+define i32 @test_cast_gep_large_indices_as() {
+; CHECK-LABEL: @test_cast_gep_large_indices_as(
+; CHECK: load i32, i32 addrspace(3)* getelementptr inbounds ([10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+   %p = getelementptr [10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0
+   %x = load i32, i32 addrspace(3)* %p, align 4
+   ret i32 %x
+}
+
+define i32 @test_constant_cast_gep_struct_indices_as() {
+; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
+; CHECK: load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo, %struct.foo addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
+  %x = getelementptr %struct.foo, %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2
+  %y = load i32, i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@constant_data_as3 = addrspace(3) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]
+
+define i32 @test_read_data_from_global_as3() {
+; CHECK-LABEL: @test_read_data_from_global_as3(
+; CHECK-NEXT: ret i32 2
+  %x = getelementptr [5 x i32], [5 x i32] addrspace(3)* @constant_data_as3, i32 0, i32 1
+  %y = load i32, i32 addrspace(3)* %x, align 4
+  ret i32 %y
+}
+
+@a = addrspace(1) constant i32 9
+@b = addrspace(1) constant i32 23
+@c = addrspace(1) constant i32 34
+@d = addrspace(1) constant i32 99
+
+@ptr_array = addrspace(2) constant [4 x i32 addrspace(1)*] [ i32 addrspace(1)* @a, i32 addrspace(1)* @b, i32 addrspace(1)* @c, i32 addrspace(1)* @d]
+@indirect = addrspace(0) constant i32 addrspace(1)* addrspace(2)* getelementptr inbounds ([4 x i32 addrspace(1)*], [4 x i32 addrspace(1)*] addrspace(2)* @ptr_array, i1 0, i32 2)
+
+define i32 @constant_through_array_as_ptrs() {
+; CHECK-LABEL: @constant_through_array_as_ptrs(
+; CHECK-NEXT: ret i32 34
+  %p = load i32 addrspace(1)* addrspace(2)*, i32 addrspace(1)* addrspace(2)* addrspace(0)* @indirect, align 4
+  %a = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %p, align 4
+  %b = load i32, i32 addrspace(1)* %a, align 4
+  ret i32 %b
+}
+
+@shared_mem = external addrspace(3) global [0 x i8]
+
+define float @canonicalize_addrspacecast(i32 %i) {
+; CHECK-LABEL: @canonicalize_addrspacecast
+; CHECK-NEXT: getelementptr inbounds float, float* addrspacecast (float addrspace(3)* bitcast ([0 x i8] addrspace(3)* @shared_mem to float addrspace(3)*) to float*), i32 %i
+  %p = getelementptr inbounds float, float* addrspacecast ([0 x i8] addrspace(3)* @shared_mem to float*), i32 %i
+  %v = load float, float* %p
+  ret float %v
+}
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-alias.ll b/llvm/test/Transforms/InstCombine/constant-fold-alias.ll
new file mode 100644
index 00000000000..810687255f6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-fold-alias.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+target datalayout = "e-p1:16:16-p2:32:32-p3:64:64"
+
+@G1 = global i32 42, align 1
+@G2 = global i32 42
+@G3 = global [4 x i8] zeroinitializer, align 1
+
+@A1 = alias i32, bitcast (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 2) to i32*)
+@A2 = alias i32, inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
+
+define i64 @f1() {
+; This cannot be constant folded because G1 is underaligned.
+; CHECK-LABEL: @f1(
+; CHECK: ret i64 and
+  ret i64 and (i64 ptrtoint (i32* @G1 to i64), i64 1)
+}
+
+define i64 @f2() {
+; The preferred alignment for G2 allows this one to foled to zero.
+; CHECK-LABEL: @f2(
+; CHECK: ret i64 0
+  ret i64 and (i64 ptrtoint (i32* @G2 to i64), i64 1)
+}
+
+define i64 @g1() {
+; This cannot be constant folded because A1 aliases G3 which is underalaigned.
+; CHECK-LABEL: @g1(
+; CHECK: ret i64 and
+  ret i64 and (i64 ptrtoint (i32* @A1 to i64), i64 1)
+}
+
+define i64 @g2() {
+; While A2 also aliases G3 which is underaligned, the math of A2 forces a
+; certain alignment allowing this to fold to zero.
+; CHECK-LABEL: @g2(
+; CHECK: ret i64 0
+  ret i64 and (i64 ptrtoint (i32* @A2 to i64), i64 1)
+}
+
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-compare.ll b/llvm/test/Transforms/InstCombine/constant-fold-compare.ll
new file mode 100644
index 00000000000..6e41e2f6802
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-fold-compare.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+define i32 @a() nounwind readnone {
+entry:
+  ret i32 zext (i1 icmp eq (i32 0, i32 ptrtoint (i32 ()* @a to i32)) to i32)
+}
+; CHECK: ret i32 0
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
new file mode 100644
index 00000000000..77090529e13
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -0,0 +1,92 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+; Constant folding should fix notionally out-of-bounds indices
+; and add inbounds keywords.
+
+%struct.X = type { [3 x i32], [3 x i32] }
+
+@Y = internal global [3 x %struct.X] zeroinitializer
+
+define void @frob() {
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 16
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 3), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 4), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 2), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 5), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 6), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 7), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 8), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 0), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 9), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 10), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 11), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 16
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 14), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 16), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 16
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
+  ret void
+}
+
+
+; PR8883 - Constant fold exotic gep subtract
+; CHECK-LABEL: @test2(
+@X = global [1000 x i8] zeroinitializer, align 16
+
+define i64 @test2() {
+entry:
+  %A = bitcast i8* getelementptr inbounds ([1000 x i8], [1000 x i8]* @X, i64 1, i64 0) to i8*
+  %B = bitcast i8* getelementptr inbounds ([1000 x i8], [1000 x i8]* @X, i64 0, i64 0) to i8*
+
+  %B2 = ptrtoint i8* %B to i64
+  %C = sub i64 0, %B2
+  %D = getelementptr i8, i8* %A, i64 %C
+  %E = ptrtoint i8* %D to i64
+
+  ret i64 %E
+  ; CHECK: ret i64 1000
+}
+
+@X_as1 = addrspace(1) global [1000 x i8] zeroinitializer, align 16
+
+define i16 @test2_as1() {
+; CHECK-LABEL: @test2_as1(
+  ; CHECK: ret i16 1000
+
+entry:
+  %A = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8], [1000 x i8] addrspace(1)* @X_as1, i64 1, i64 0) to i8 addrspace(1)*
+  %B = bitcast i8 addrspace(1)* getelementptr inbounds ([1000 x i8], [1000 x i8] addrspace(1)* @X_as1, i64 0, i64 0) to i8 addrspace(1)*
+
+  %B2 = ptrtoint i8 addrspace(1)* %B to i16
+  %C = sub i16 0, %B2
+  %D = getelementptr i8, i8 addrspace(1)* %A, i16 %C
+  %E = ptrtoint i8 addrspace(1)* %D to i16
+
+  ret i16 %E
+}
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll b/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll
new file mode 100644
index 00000000000..e1b692173ce
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S -debug 2>&1 | FileCheck %s
+; REQUIRES: asserts
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+define i32 @a() nounwind readnone {
+entry:
+  ret i32 zext (i1 icmp eq (i32 0, i32 ptrtoint (i32 ()* @a to i32)) to i32)
+}
+; CHECK: INSTCOMBINE ITERATION #1
+; CHECK-NOT: INSTCOMBINE ITERATION #2
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-libfunc.ll b/llvm/test/Transforms/InstCombine/constant-fold-libfunc.ll
new file mode 100644
index 00000000000..5d1aa821ea1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-fold-libfunc.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare double @acos(double)
+
+; Check that functions without any function attributes are simplified.
+
+define double @test_simplify_acos() {
+; CHECK-LABEL: @test_simplify_acos
+  %pi = call double @acos(double -1.000000e+00)
+; CHECK-NOT: call double @acos
+; CHECK: ret double 0x400921FB54442D18
+  ret double %pi
+}
+
+; Check that we don't constant fold builtin functions.
+
+define double @test_acos_nobuiltin() {
+; CHECK-LABEL: @test_acos_nobuiltin
+  %pi = call double @acos(double -1.000000e+00) nobuiltin 
+; CHECK: call double @acos(double -1.000000e+00)
+  ret double %pi
+}
+
+; Check that we don't constant fold strictfp results that require rounding.
+
+define double @test_acos_strictfp() {
+; CHECK-LABEL: @test_acos_strictfp
+  %pi = call double @acos(double -1.000000e+00) strictfp 
+; CHECK: call double @acos(double -1.000000e+00)
+  ret double %pi
+}
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-math.ll b/llvm/test/Transforms/InstCombine/constant-fold-math.ll
new file mode 100644
index 00000000000..27578387f82
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-fold-math.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float) #0
+declare float @llvm.fmuladd.f32(float, float, float) #0
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
+
+declare double @llvm.fma.f64(double, double, double) #0
+declare double @llvm.fmuladd.f64(double, double, double) #0
+
+declare double @llvm.sqrt.f64(double) #0
+
+
+; CHECK-LABEL: @constant_fold_fma_f32
+; CHECK-NEXT: ret float 6.000000e+00
+define float @constant_fold_fma_f32() #0 {
+  %x = call float @llvm.fma.f32(float 1.0, float 2.0, float 4.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_fma_v4f32
+; CHECK-NEXT: ret <4 x float> <float 1.200000e+01, float 1.400000e+01, float 1.600000e+01, float 1.800000e+01>
+define <4 x float> @constant_fold_fma_v4f32() #0 {
+  %x = call <4 x float> @llvm.fma.v4f32(<4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_fmuladd_f32
+; CHECK-NEXT: ret float 6.000000e+00
+define float @constant_fold_fmuladd_f32() #0 {
+  %x = call float @llvm.fmuladd.f32(float 1.0, float 2.0, float 4.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_fma_f64
+; CHECK-NEXT: ret double 6.000000e+00
+define double @constant_fold_fma_f64() #0 {
+  %x = call double @llvm.fma.f64(double 1.0, double 2.0, double 4.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_fmuladd_f64
+; CHECK-NEXT: ret double 6.000000e+00
+define double @constant_fold_fmuladd_f64() #0 {
+  %x = call double @llvm.fmuladd.f64(double 1.0, double 2.0, double 4.0) #0
+  ret double %x
+}
+
+; PR32177
+
+; CHECK-LABEL: @constant_fold_frem_f32
+; CHECK-NEXT: ret float 0x41A61B2000000000
+define float @constant_fold_frem_f32() #0 {
+  %x = frem float 0x43cbfcd960000000, 0xc1e2b34a00000000
+  ret float %x
+}
+
+; PR3316
+
+; CHECK-LABEL: @constant_fold_frem_f64
+; CHECK-NEXT: ret double 0.000000e+00
+define double @constant_fold_frem_f64() {
+  %x = frem double 0x43E0000000000000, 1.000000e+00
+  ret double %x
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-shifts.ll b/llvm/test/Transforms/InstCombine/constant-fold-shifts.ll
new file mode 100644
index 00000000000..1a5e0c35f51
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-fold-shifts.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+@A = external constant i32
+
+; OSS-Fuzz #14169
+; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=14169
+define void @ossfuzz_14169_test1(i32* %a0) {
+; CHECK-LABEL: @ossfuzz_14169_test1(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %B = ptrtoint i32* @A to i64
+  %C = icmp sge i64 %B, 0
+  %X = select i1 %C, i712 0, i712 1
+  %B9 = lshr i712 %X, 146783911423364576743092537299333564210980159306769991919205685720763064069663027716481187399048043939495936
+  %G5 = getelementptr i64, i64* undef, i712 %B9
+  store i64* %G5, i64** undef
+  ret void
+}
+
+define void @ossfuzz_14169_test2(i32* %a0) {
+; CHECK-LABEL: @ossfuzz_14169_test2(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %B = ptrtoint i32* @A to i64
+  %C = icmp sge i64 %B, 0
+  %X = select i1 %C, i712 0, i712 1
+  %B9 = shl i712 %X, 146783911423364576743092537299333564210980159306769991919205685720763064069663027716481187399048043939495936
+  %G5 = getelementptr i64, i64* undef, i712 %B9
+  store i64* %G5, i64** undef
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/convergent.ll b/llvm/test/Transforms/InstCombine/convergent.ll
new file mode 100644
index 00000000000..1791de70875
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/convergent.ll
@@ -0,0 +1,44 @@
+; RUN: opt -instcombine -S < %s | FileCheck -enable-var-scope %s
+
+declare i32 @k() convergent
+declare i32 @f()
+
+declare i64 @llvm.read_register.i64(metadata) nounwind
+
+define i32 @extern() {
+  ; Convergent attr shouldn't be removed here; k is convergent.
+  ; CHECK: call i32 @k() [[$CONVERGENT_ATTR:#[0-9]+]]
+  %a = call i32 @k() convergent
+  ret i32 %a
+}
+
+define i32 @extern_no_attr() {
+  ; Convergent attr shouldn't be added here, even though k is convergent.
+  ; CHECK: call i32 @k(){{$}}
+  %a = call i32 @k()
+  ret i32 %a
+}
+
+define i32 @no_extern() {
+  ; Convergent should be removed here, as the target is convergent.
+  ; CHECK: call i32 @f(){{$}}
+  %a = call i32 @f() convergent
+  ret i32 %a
+}
+
+define i32 @indirect_call(i32 ()* %f) {
+  ; CHECK: call i32 %f() [[$CONVERGENT_ATTR]]
+  %a = call i32 %f() convergent
+  ret i32 %a
+}
+
+; do not remove from convergent intrinsic call sites
+; CHECK-LABEL: @convergent_intrinsic_call(
+; CHECK: call i64 @llvm.read_register.i64(metadata !0) [[$CONVERGENT_ATTR]]
+define i64 @convergent_intrinsic_call() {
+  %val = call i64 @llvm.read_register.i64(metadata !0) convergent
+  ret i64 %val
+}
+
+; CHECK: [[$CONVERGENT_ATTR]] = { convergent }
+!0 = !{!"foo"}
diff --git a/llvm/test/Transforms/InstCombine/copysign.ll b/llvm/test/Transforms/InstCombine/copysign.ll
new file mode 100644
index 00000000000..556b79999b0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/copysign.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.copysign.f32(float, float) #0
+declare double @llvm.copysign.f64(double, double) #0
+
+; CHECK-LABEL: @constant_fold_copysign_f32_01
+; CHECK-NEXT: ret float -1.000000e+00
+define float @constant_fold_copysign_f32_01() #0 {
+  %x = call float @llvm.copysign.f32(float 1.0, float -2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f32_02
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_copysign_f32_02() #0 {
+  %x = call float @llvm.copysign.f32(float -2.0, float 1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f32_03
+; CHECK-NEXT: ret float -2.000000e+00
+define float @constant_fold_copysign_f32_03() #0 {
+  %x = call float @llvm.copysign.f32(float -2.0, float -1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f64_01
+; CHECK-NEXT: ret double -1.000000e+00
+define double @constant_fold_copysign_f64_01() #0 {
+  %x = call double @llvm.copysign.f64(double 1.0, double -2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f64_02
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_copysign_f64_02() #0 {
+  %x = call double @llvm.copysign.f64(double -1.0, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_copysign_f64_03
+; CHECK-NEXT: ret double -1.000000e+00
+define double @constant_fold_copysign_f64_03() #0 {
+  %x = call double @llvm.copysign.f64(double -1.0, double -2.0) #0
+  ret double %x
+}
+
+
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/cos-1.ll b/llvm/test/Transforms/InstCombine/cos-1.ll
new file mode 100644
index 00000000000..50db2a98e83
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cos-1.ll
@@ -0,0 +1,175 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S                             | FileCheck %s --check-prefixes=ANY,NO-FLOAT-SHRINK
+; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s --check-prefixes=ANY,DO-FLOAT-SHRINK
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare double @cos(double)
+declare double @llvm.cos.f64(double)
+declare float @cosf(float)
+declare float @llvm.cos.f32(float)
+
+declare double @sin(double)
+declare double @llvm.sin.f64(double)
+declare float @sinf(float)
+declare float @llvm.sin.f32(float)
+
+declare double @tan(double)
+declare fp128 @tanl(fp128)
+
+; cos(-x) -> cos(x);
+
+define double @cos_negated_arg(double %x) {
+; ANY-LABEL: @cos_negated_arg(
+; ANY-NEXT:    [[COS:%.*]] = call double @cos(double [[X:%.*]])
+; ANY-NEXT:    ret double [[COS]]
+;
+  %neg = fsub double -0.0, %x
+  %r = call double @cos(double %neg)
+  ret double %r
+}
+
+define float @cosf_negated_arg(float %x) {
+; ANY-LABEL: @cosf_negated_arg(
+; ANY-NEXT:    [[COS:%.*]] = call float @cosf(float [[X:%.*]])
+; ANY-NEXT:    ret float [[COS]]
+;
+  %neg = fsub float -0.0, %x
+  %r = call float @cosf(float %neg)
+  ret float %r
+}
+
+define float @cosf_negated_arg_FMF(float %x) {
+; ANY-LABEL: @cosf_negated_arg_FMF(
+; ANY-NEXT:    [[COS:%.*]] = call reassoc nnan float @cosf(float [[X:%.*]])
+; ANY-NEXT:    ret float [[COS]]
+;
+  %neg = fsub float -0.0, %x
+  %r = call nnan reassoc float @cosf(float %neg)
+  ret float %r
+}
+
+; sin(-x) -> -sin(x);
+
+define double @sin_negated_arg(double %x) {
+; ANY-LABEL: @sin_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call double @sin(double [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; ANY-NEXT:    ret double [[TMP2]]
+;
+  %neg = fsub double -0.0, %x
+  %r = call double @sin(double %neg)
+  ret double %r
+}
+
+define float @sinf_negated_arg(float %x) {
+; ANY-LABEL: @sinf_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call float @sinf(float [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; ANY-NEXT:    ret float [[TMP2]]
+;
+  %neg = fsub float -0.0, %x
+  %r = call float @sinf(float %neg)
+  ret float %r
+}
+
+define float @sinf_negated_arg_FMF(float %x) {
+; ANY-LABEL: @sinf_negated_arg_FMF(
+; ANY-NEXT:    [[TMP1:%.*]] = call nnan afn float @sinf(float [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub nnan afn float -0.000000e+00, [[TMP1]]
+; ANY-NEXT:    ret float [[TMP2]]
+;
+  %neg = fsub ninf float -0.0, %x
+  %r = call afn nnan float @sinf(float %neg)
+  ret float %r
+}
+
+declare void @use(double)
+
+define double @sin_negated_arg_extra_use(double %x) {
+; ANY-LABEL: @sin_negated_arg_extra_use(
+; ANY-NEXT:    [[NEG:%.*]] = fsub double -0.000000e+00, [[X:%.*]]
+; ANY-NEXT:    [[R:%.*]] = call double @sin(double [[NEG]])
+; ANY-NEXT:    call void @use(double [[NEG]])
+; ANY-NEXT:    ret double [[R]]
+;
+  %neg = fsub double -0.0, %x
+  %r = call double @sin(double %neg)
+  call void @use(double %neg)
+  ret double %r
+}
+
+; -sin(-x) --> sin(x)
+; PR38458: https://bugs.llvm.org/show_bug.cgi?id=38458
+
+define double @neg_sin_negated_arg(double %x) {
+; ANY-LABEL: @neg_sin_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call double @sin(double [[X:%.*]])
+; ANY-NEXT:    ret double [[TMP1]]
+;
+  %neg = fsub double -0.0, %x
+  %r = call double @sin(double %neg)
+  %rn = fsub double -0.0, %r
+  ret double %rn
+}
+
+; tan(-x) -> -tan(x);
+
+define double @tan_negated_arg(double %x) {
+; ANY-LABEL: @tan_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call double @tan(double [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; ANY-NEXT:    ret double [[TMP2]]
+;
+  %neg = fsub double -0.0, %x
+  %r = call double @tan(double %neg)
+  ret double %r
+}
+
+; tanl(-x) -> -tanl(x);
+
+define fp128 @tanl_negated_arg(fp128 %x) {
+; ANY-LABEL: @tanl_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call fp128 @tanl(fp128 [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub fp128 0xL00000000000000008000000000000000, [[TMP1]]
+; ANY-NEXT:    ret fp128 [[TMP2]]
+;
+  %neg = fsub fp128 0xL00000000000000008000000000000000, %x
+  %r = call fp128 @tanl(fp128 %neg)
+  ret fp128 %r
+}
+
+define float @negated_and_shrinkable_libcall(float %f) {
+; NO-FLOAT-SHRINK-LABEL: @negated_and_shrinkable_libcall(
+; NO-FLOAT-SHRINK-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
+; NO-FLOAT-SHRINK-NEXT:    [[COS1:%.*]] = call double @cos(double [[CONV1]])
+; NO-FLOAT-SHRINK-NEXT:    [[CONV2:%.*]] = fptrunc double [[COS1]] to float
+; NO-FLOAT-SHRINK-NEXT:    ret float [[CONV2]]
+;
+; DO-FLOAT-SHRINK-LABEL: @negated_and_shrinkable_libcall(
+; DO-FLOAT-SHRINK-NEXT:    [[COSF:%.*]] = call float @cosf(float [[F:%.*]])
+; DO-FLOAT-SHRINK-NEXT:    ret float [[COSF]]
+;
+  %conv1 = fpext float %f to double
+  %neg = fsub double -0.0, %conv1
+  %cos = call double @cos(double %neg)
+  %conv2 = fptrunc double %cos to float
+  ret float %conv2
+}
+
+; TODO: It was ok to shrink the libcall, so the intrinsic should shrink too?
+
+define float @negated_and_shrinkable_intrinsic(float %f) {
+; ANY-LABEL: @negated_and_shrinkable_intrinsic(
+; ANY-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
+; ANY-NEXT:    [[COS:%.*]] = call double @llvm.cos.f64(double [[CONV1]])
+; ANY-NEXT:    [[CONV2:%.*]] = fptrunc double [[COS]] to float
+; ANY-NEXT:    ret float [[CONV2]]
+;
+  %conv1 = fpext float %f to double
+  %neg = fsub double -0.0, %conv1
+  %cos = call double @llvm.cos.f64(double %neg)
+  %conv2 = fptrunc double %cos to float
+  ret float %conv2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/cos-2.ll b/llvm/test/Transforms/InstCombine/cos-2.ll
new file mode 100644
index 00000000000..a85cc8fa6bd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cos-2.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare float @cos(double)
+declare signext i8 @sqrt(...)
+
+; Check that functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(double %d) {
+; CHECK-LABEL: @test_no_simplify1(
+  %neg = fsub double -0.000000e+00, %d
+  %cos = call float @cos(double %neg)
+; CHECK: call float @cos(double %neg)
+  ret float %cos
+}
+
+
+define i8 @bogus_sqrt() {
+  %fake_sqrt = call signext i8 (...) @sqrt()
+  ret i8 %fake_sqrt
+
+; CHECK-LABEL: bogus_sqrt(
+; CHECK-NEXT:  %fake_sqrt = call signext i8 (...) @sqrt()
+; CHECK-NEXT:  ret i8 %fake_sqrt
+}
+
diff --git a/llvm/test/Transforms/InstCombine/cos-sin-intrinsic.ll b/llvm/test/Transforms/InstCombine/cos-sin-intrinsic.ll
new file mode 100644
index 00000000000..ef5513d086e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/cos-sin-intrinsic.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare double    @llvm.cos.f64(double %Val)
+declare float     @llvm.cos.f32(float %Val)
+declare <2 x float> @llvm.cos.v2f32(<2 x float> %Val)
+
+declare float @llvm.fabs.f32(float %Val)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> %Val)
+
+define double @undef_arg() {
+; CHECK-LABEL: @undef_arg(
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+  %r = call double @llvm.cos.f64(double undef)
+  ret double %r
+}
+
+define float @undef_arg2(float %d) {
+; CHECK-LABEL: @undef_arg2(
+; CHECK-NEXT:    [[COSVAL:%.*]] = call float @llvm.cos.f32(float [[D:%.*]])
+; CHECK-NEXT:    [[FSUM:%.*]] = fadd float [[COSVAL]], 0.000000e+00
+; CHECK-NEXT:    ret float [[FSUM]]
+;
+  %cosval   = call float @llvm.cos.f32(float %d)
+  %cosval2  = call float @llvm.cos.f32(float undef)
+  %fsum   = fadd float %cosval2, %cosval
+  ret float %fsum
+}
+
+define float @fneg_f32(float %x) {
+; CHECK-LABEL: @fneg_f32(
+; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.cos.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[COS]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %cos = call float @llvm.cos.f32(float %x.fneg)
+  ret float %cos
+}
+
+define <2 x float> @fneg_v2f32(<2 x float> %x) {
+; CHECK-LABEL: @fneg_v2f32(
+; CHECK-NEXT:    [[COS:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[COS]]
+;
+  %x.fneg = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %cos = call <2 x float> @llvm.cos.v2f32(<2 x float> %x.fneg)
+  ret <2 x float> %cos
+}
+
+; FMF are not required, but they should propagate.
+
+define <2 x float> @fneg_cos_fmf(<2 x float> %x){
+; CHECK-LABEL: @fneg_cos_fmf(
+; CHECK-NEXT:    [[R:%.*]] = call nnan afn <2 x float> @llvm.cos.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fsub fast <2 x float> <float -0.0, float -0.0>, %x
+  %r = call nnan afn <2 x float> @llvm.cos.v2f32(<2 x float> %negx)
+  ret <2 x float> %r
+}
+
+define float @fabs_f32(float %x) {
+; CHECK-LABEL: @fabs_f32(
+; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.cos.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[COS]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %cos = call float @llvm.cos.f32(float %x.fabs)
+  ret float %cos
+}
+
+define float @fabs_fneg_f32(float %x) {
+; CHECK-LABEL: @fabs_fneg_f32(
+; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.cos.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[COS]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %x.fabs.fneg = fsub float -0.0, %x.fabs
+  %cos = call float @llvm.cos.f32(float %x.fabs.fneg)
+  ret float %cos
+}
+
+define <2 x float> @fabs_fneg_v2f32(<2 x float> %x) {
+; CHECK-LABEL: @fabs_fneg_v2f32(
+; CHECK-NEXT:    [[COS:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[COS]]
+;
+  %x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
+  %x.fabs.fneg = fsub <2 x float> <float -0.0, float -0.0>, %x.fabs
+  %cos = call <2 x float> @llvm.cos.v2f32(<2 x float> %x.fabs.fneg)
+  ret <2 x float> %cos
+}
+
+; Negate is canonicalized after sin.
+
+declare <2 x float> @llvm.sin.v2f32(<2 x float>)
+
+define <2 x float> @fneg_sin(<2 x float> %x){
+; CHECK-LABEL: @fneg_sin(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %negx)
+  ret <2 x float> %r
+}
+
+; FMF are not required, but they should propagate.
+
+define <2 x float> @fneg_sin_fmf(<2 x float> %x){
+; CHECK-LABEL: @fneg_sin_fmf(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan arcp afn <2 x float> @llvm.sin.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub nnan arcp afn <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fsub fast <2 x float> <float -0.0, float -0.0>, %x
+  %r = call nnan arcp afn <2 x float> @llvm.sin.v2f32(<2 x float> %negx)
+  ret <2 x float> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/crash.ll b/llvm/test/Transforms/InstCombine/crash.ll
new file mode 100644
index 00000000000..fbb9675c0f1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/crash.ll
@@ -0,0 +1,398 @@
+; RUN: opt < %s -instcombine -S
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128:n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+define i32 @test0(i8 %tmp2) ssp {
+entry:
+  %tmp3 = zext i8 %tmp2 to i32
+  %tmp8 = lshr i32 %tmp3, 6 
+  %tmp9 = lshr i32 %tmp3, 7 
+  %tmp10 = xor i32 %tmp9, 67108858
+  %tmp11 = xor i32 %tmp10, %tmp8 
+  %tmp12 = xor i32 %tmp11, 0     
+  ret i32 %tmp12
+}
+
+; PR4905
+define <2 x i64> @test1(<2 x i64> %x, <2 x i64> %y) nounwind {
+entry:
+  %conv.i94 = bitcast <2 x i64> %y to <4 x i32>   ; <<4 x i32>> [#uses=1]
+  %sub.i97 = sub <4 x i32> %conv.i94, undef       ; <<4 x i32>> [#uses=1]
+  %conv3.i98 = bitcast <4 x i32> %sub.i97 to <2 x i64> ; <<2 x i64>> [#uses=2]
+  %conv2.i86 = bitcast <2 x i64> %conv3.i98 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %cmp.i87 = icmp sgt <4 x i32> undef, %conv2.i86 ; <<4 x i1>> [#uses=1]
+  %sext.i88 = sext <4 x i1> %cmp.i87 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %conv3.i89 = bitcast <4 x i32> %sext.i88 to <2 x i64> ; <<2 x i64>> [#uses=1]
+  %and.i = and <2 x i64> %conv3.i89, %conv3.i98   ; <<2 x i64>> [#uses=1]
+  %or.i = or <2 x i64> zeroinitializer, %and.i    ; <<2 x i64>> [#uses=1]
+  %conv2.i43 = bitcast <2 x i64> %or.i to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %sub.i = sub <4 x i32> zeroinitializer, %conv2.i43 ; <<4 x i32>> [#uses=1]
+  %conv3.i44 = bitcast <4 x i32> %sub.i to <2 x i64> ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %conv3.i44
+}
+
+
+; PR4908
+define void @test2(<1 x i16>* nocapture %b, i32* nocapture %c) nounwind ssp {
+entry:
+  %arrayidx = getelementptr inbounds <1 x i16>, <1 x i16>* %b, i64 undef ; <<1 x i16>*>
+  %tmp2 = load <1 x i16>, <1 x i16>* %arrayidx               ; <<1 x i16>> [#uses=1]
+  %tmp6 = bitcast <1 x i16> %tmp2 to i16          ; <i16> [#uses=1]
+  %tmp7 = zext i16 %tmp6 to i32                   ; <i32> [#uses=1]
+  %ins = or i32 0, %tmp7                          ; <i32> [#uses=1]
+  %arrayidx20 = getelementptr inbounds i32, i32* %c, i64 undef ; <i32*> [#uses=1]
+  store i32 %ins, i32* %arrayidx20
+  ret void
+}
+
+; PR5262
+@tmp2 = global i64 0                              ; <i64*> [#uses=1]
+
+declare void @use(i64) nounwind
+
+define void @foo(i1) nounwind align 2 {
+; <label>:1
+  br i1 %0, label %2, label %3
+
+; <label>:2                                       ; preds = %1
+  br label %3
+
+; <label>:3                                       ; preds = %2, %1
+  %4 = phi i8 [ 1, %2 ], [ 0, %1 ]                ; <i8> [#uses=1]
+  %5 = icmp eq i8 %4, 0                           ; <i1> [#uses=1]
+  %6 = load i64, i64* @tmp2, align 8                   ; <i64> [#uses=1]
+  %7 = select i1 %5, i64 0, i64 %6                ; <i64> [#uses=1]
+  br label %8
+
+; <label>:8                                       ; preds = %3
+  call void @use(i64 %7)
+  ret void
+}
+
+%t0 = type { i32, i32 }
+%t1 = type { i32, i32, i32, i32, i32* }
+
+declare %t0* @bar2(i64)
+
+define void @bar3(i1, i1) nounwind align 2 {
+; <label>:2
+  br i1 %1, label %10, label %3
+
+; <label>:3                                       ; preds = %2
+  %4 = getelementptr inbounds %t0, %t0* null, i64 0, i32 1 ; <i32*> [#uses=0]
+  %5 = getelementptr inbounds %t1, %t1* null, i64 0, i32 4 ; <i32**> [#uses=1]
+  %6 = load i32*, i32** %5, align 8                     ; <i32*> [#uses=1]
+  %7 = icmp ne i32* %6, null                      ; <i1> [#uses=1]
+  %8 = zext i1 %7 to i32                          ; <i32> [#uses=1]
+  %9 = add i32 %8, 0                              ; <i32> [#uses=1]
+  br label %10
+
+; <label>:10                                      ; preds = %3, %2
+  %11 = phi i32 [ %9, %3 ], [ 0, %2 ]             ; <i32> [#uses=1]
+  br i1 %1, label %12, label %13
+
+; <label>:12                                      ; preds = %10
+  br label %13
+
+; <label>:13                                      ; preds = %12, %10
+  %14 = zext i32 %11 to i64                       ; <i64> [#uses=1]
+  %15 = tail call %t0* @bar2(i64 %14) nounwind      ; <%0*> [#uses=0]
+  ret void
+}
+
+
+
+
+; PR5262
+; Make sure the PHI node gets put in a place where all of its operands dominate
+; it.
+define i64 @test4(i1 %c, i64* %P) nounwind align 2 {
+BB0:
+  br i1 %c, label %BB1, label %BB2
+
+BB1:
+  br label %BB2
+
+BB2:
+  %v5_ = phi i1 [ true, %BB0], [false, %BB1]
+  %v6 = load i64, i64* %P
+  br label %l8
+
+l8:
+  br label %l10
+  
+l10:
+  %v11 = select i1 %v5_, i64 0, i64 %v6
+  ret i64 %v11
+}
+
+; PR5471
+define i32 @test5a() {
+       ret i32 0
+}
+
+define void @test5() personality i32 (...)* @__gxx_personality_v0 {
+  store i1 true, i1* undef
+  %r = invoke i32 @test5a() to label %exit unwind label %unwind
+unwind:
+  %exn = landingpad {i8*, i32}
+          cleanup
+  br label %exit
+exit:
+  ret void
+}
+
+
+; PR5673
+
+@test6g = external global i32*  
+
+define arm_aapcs_vfpcc i32 @test6(i32 %argc, i8** %argv) nounwind {
+entry:
+  store i32* getelementptr (i32, i32* bitcast (i32 (i32, i8**)* @test6 to i32*), i32 -2048), i32** @test6g, align 4
+  unreachable
+}
+
+
+; PR5827
+
+%class.RuleBasedBreakIterator = type { i64 ()* }
+%class.UStack = type { i8** }
+
+define i32 @_ZN22RuleBasedBreakIterator15checkDictionaryEi(%class.RuleBasedBreakIterator* %this, i32 %x) align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %breaks = alloca %class.UStack, align 4         ; <%class.UStack*> [#uses=3]
+  call void @_ZN6UStackC1Ei(%class.UStack* %breaks, i32 0)
+  %tobool = icmp ne i32 %x, 0                     ; <i1> [#uses=1]
+  br i1 %tobool, label %cond.end, label %cond.false
+
+terminate.handler:                                ; preds = %ehcleanup
+  %exc = landingpad { i8*, i32 }
+           cleanup
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+
+ehcleanup:                                        ; preds = %cond.false
+  %exc1 = landingpad { i8*, i32 }
+           catch i8* null
+  invoke void @_ZN6UStackD1Ev(%class.UStack* %breaks)
+          to label %cont unwind label %terminate.handler
+
+cont:                                             ; preds = %ehcleanup
+  resume { i8*, i32 } %exc1
+
+cond.false:                                       ; preds = %entry
+  %tmp4 = getelementptr inbounds %class.RuleBasedBreakIterator, %class.RuleBasedBreakIterator* %this, i32 0, i32 0 ; <i64 ()**> [#uses=1]
+  %tmp5 = load i64 ()*, i64 ()** %tmp4                     ; <i64 ()*> [#uses=1]
+  %call = invoke i64 %tmp5()
+          to label %cond.end unwind label %ehcleanup ; <i64> [#uses=1]
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i64 [ 0, %entry ], [ %call, %cond.false ] ; <i64> [#uses=1]
+  %conv = trunc i64 %cond to i32                  ; <i32> [#uses=1]
+  call void @_ZN6UStackD1Ev(%class.UStack* %breaks)
+  ret i32 %conv
+}
+
+declare void @_ZN6UStackC1Ei(%class.UStack*, i32)
+
+declare void @_ZN6UStackD1Ev(%class.UStack*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZSt9terminatev()
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+
+
+; rdar://7590304
+define i8* @test10(i8* %self, i8* %tmp3) personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  store i1 true, i1* undef
+  store i1 true, i1* undef
+  invoke void @test10a()
+          to label %invoke.cont unwind label %try.handler ; <i8*> [#uses=0]
+
+invoke.cont:                                      ; preds = %entry
+  unreachable
+
+try.handler:                                      ; preds = %entry
+  %exn = landingpad {i8*, i32}
+           catch i8* null
+  ret i8* %self
+}
+
+define void @test10a() {
+  ret void
+}
+
+
+; PR6193
+define i32 @test11(i32 %aMaskWidth, i8 %aStride) nounwind {
+entry:
+  %conv41 = sext i8 %aStride to i32
+  %neg = xor i32 %conv41, -1
+  %and42 = and i32 %aMaskWidth, %neg
+  %and47 = and i32 130, %conv41
+  %or = or i32 %and42, %and47
+  ret i32 %or
+}
+
+; PR6503
+define void @test12(i32* %A) nounwind {
+entry:
+  %tmp1 = load i32, i32* %A
+  %cmp = icmp ugt i32 1, %tmp1                    ; <i1> [#uses=1]
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  %tmp2 = load i32, i32* %A
+  %cmp3 = icmp ne i32 %tmp2, 0                    ; <i1> [#uses=1]
+  %conv4 = zext i1 %cmp3 to i32                   ; <i32> [#uses=1]
+  %or = or i32 %conv, %conv4                      ; <i32> [#uses=1]
+  %cmp5 = icmp ugt i32 undef, %or                 ; <i1> [#uses=1]
+  %conv6 = zext i1 %cmp5 to i32                   ; <i32> [#uses=0]
+  ret void
+}
+
+%s1 = type { %s2, %s2, [6 x %s2], i32, i32, i32, [1 x i32], [0 x i8] }
+%s2 = type { i64 }
+define void @test13() nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %s1, %s1* null, i64 0, i32 2, i64 0, i32 0
+  %1 = bitcast i64* %0 to i32*
+  %2 = getelementptr inbounds %s1, %s1* null, i64 0, i32 2, i64 1, i32 0
+  %.pre = load i32, i32* %1, align 8
+  %3 = lshr i32 %.pre, 19
+  %brmerge = or i1 undef, undef
+  %4 = and i32 %3, 3
+  %5 = add nsw i32 %4, 1
+  %6 = shl i32 %5, 19
+  %7 = add i32 %6, 1572864
+  %8 = and i32 %7, 1572864
+  %9 = load i64, i64* %2, align 8
+  %trunc156 = trunc i64 %9 to i32
+  %10 = and i32 %trunc156, -1537
+  %11 = and i32 %10, -6145
+  %12 = or i32 %11, 2048
+  %13 = and i32 %12, -24577
+  %14 = or i32 %13, 16384
+  %15 = or i32 %14, 98304
+  store i32 %15, i32* undef, align 8
+  %16 = and i32 %15, -1572865
+  %17 = or i32 %16, %8
+  store i32 %17, i32* undef, align 8
+  %18 = and i32 %17, -449
+  %19 = or i32 %18, 64
+  store i32 %19, i32* undef, align 8
+  unreachable
+}
+
+
+; PR8807
+declare i32 @test14f(i8* (i8*)*) nounwind
+
+define void @test14() nounwind readnone {
+entry:
+  %tmp = bitcast i32 (i8* (i8*)*)* @test14f to i32 (i32*)*
+  %call10 = call i32 %tmp(i32* byval undef)
+  ret void
+}
+
+
+; PR8896
+@g_54 = external global [7 x i16]
+
+define void @test15(i32* %p_92) nounwind {
+entry:
+%0 = load i32, i32* %p_92, align 4
+%1 = icmp ne i32 %0, 0
+%2 = zext i1 %1 to i32
+%3 = call i32 @func_14() nounwind
+%4 = trunc i32 %3 to i16
+%5 = sext i16 %4 to i32
+%6 = trunc i32 %5 to i16
+br i1 undef, label %"3", label %"5"
+
+"3":                                              ; preds = %entry
+%7 = sext i16 %6 to i32
+%8 = ashr i32 %7, -1649554541
+%9 = trunc i32 %8 to i16
+br label %"5"
+
+"5":                                              ; preds = %"3", %entry
+%10 = phi i16 [ %9, %"3" ], [ %6, %entry ]
+%11 = sext i16 %10 to i32
+%12 = xor i32 %2, %11
+%13 = sext i32 %12 to i64
+%14 = icmp ne i64 %13, 0
+br i1 %14, label %return, label %"7"
+
+"7":                                              ; preds = %"5"
+ret void
+
+return:                                           ; preds = %"5"
+ret void
+}
+
+declare i32 @func_14()
+
+
+define double @test16(i32 %a) nounwind {
+  %cmp = icmp slt i32 %a, 2
+  %select = select i1 %cmp, double 2.000000e+00, double 3.141592e+00
+  ret double %select
+}
+
+
+; PR8983
+%struct.basic_ios = type { i8 }
+
+define %struct.basic_ios *@test17() ssp {
+entry:
+  %add.ptr.i = getelementptr i8, i8* null, i64 undef
+  %0 = bitcast i8* %add.ptr.i to %struct.basic_ios*
+  ret %struct.basic_ios* %0
+}
+
+; PR9013
+define void @test18() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %l_197.0 = phi i32 [ 0, %entry ], [ %sub.i, %for.inc ]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.cond
+  %conv = and i32 %l_197.0, 255
+  %sub.i = add nsw i32 %conv, -1
+  br label %for.cond
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+; PR11275
+declare void @test18b() noreturn
+declare void @test18foo(double**)
+declare void @test18a() noreturn
+define fastcc void @test18x(i8* %t0, i1 %b) uwtable align 2 personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  br i1 %b, label %e1, label %e2
+e1:
+  %t2 = bitcast i8* %t0 to double**
+  invoke void @test18b() noreturn
+          to label %u unwind label %lpad
+e2:
+  %t4 = bitcast i8* %t0 to double**
+  invoke void @test18a() noreturn
+          to label %u unwind label %lpad
+lpad:
+  %t5 = phi double** [ %t2, %e1 ], [ %t4, %e2 ]
+  %lpad.nonloopexit262 = landingpad { i8*, i32 }
+          cleanup
+  call void @test18foo(double** %t5)
+  unreachable
+u:
+  unreachable
+}
diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz-bitreverse.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz-bitreverse.ll
new file mode 100644
index 00000000000..fb745959829
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ctlz-cttz-bitreverse.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @ctlz_true_bitreverse(i32 %x) {
+; CHECK-LABEL: @ctlz_true_bitreverse(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range !0
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %a = tail call i32 @llvm.bitreverse.i32(i32 %x)
+  %b = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+  ret i32 %b
+}
+
+define <2 x i64> @ctlz_true_bitreverse_vec(<2 x i64> %x) {
+; CHECK-LABEL: @ctlz_true_bitreverse_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X:%.*]], i1 true)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %a = tail call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %x)
+  %b = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
+  ret <2 x i64> %b
+}
+
+define i32 @ctlz_false_bitreverse(i32 %x) {
+; CHECK-LABEL: @ctlz_false_bitreverse(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %a = tail call i32 @llvm.bitreverse.i32(i32 %x)
+  %b = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %b
+}
+
+define i32 @cttz_true_bitreverse(i32 %x) {
+; CHECK-LABEL: @cttz_true_bitreverse(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), !range !0
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %a = tail call i32 @llvm.bitreverse.i32(i32 %x)
+  %b = tail call i32 @llvm.cttz.i32(i32 %a, i1 true)
+  ret i32 %b
+}
+
+define <2 x i64> @cttz_true_bitreverse_vec(<2 x i64> %x) {
+; CHECK-LABEL: @cttz_true_bitreverse_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[X:%.*]], i1 true)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %a = tail call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %x)
+  %b = tail call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+  ret <2 x i64> %b
+}
+
+define i32 @cttz_false_bitreverse(i32 %x) {
+; CHECK-LABEL: @cttz_false_bitreverse(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %a = tail call i32 @llvm.bitreverse.i32(i32 %x)
+  %b = tail call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %b
+}
+
+declare i32 @llvm.bitreverse.i32(i32)
+declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
diff --git a/llvm/test/Transforms/InstCombine/ctpop-bswap-bitreverse.ll b/llvm/test/Transforms/InstCombine/ctpop-bswap-bitreverse.ll
new file mode 100644
index 00000000000..00e2aa88666
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ctpop-bswap-bitreverse.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @ctpop_bitreverse(i32 %x) {
+; CHECK-LABEL: @ctpop_bitreverse(
+; CHECK-NEXT:    [[B:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %a = tail call i32 @llvm.bitreverse.i32(i32 %x)
+  %b = tail call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %b
+}
+
+define <2 x i64> @ctpop_bitreverse_vec(<2 x i64> %x) {
+; CHECK-LABEL: @ctpop_bitreverse_vec(
+; CHECK-NEXT:    [[B:%.*]] = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+  %a = tail call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %x)
+  %b = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+  ret <2 x i64> %b
+}
+
+define i32 @ctpop_bswap(i32 %x) {
+; CHECK-LABEL: @ctpop_bswap(
+; CHECK-NEXT:    [[B:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %a = tail call i32 @llvm.bswap.i32(i32 %x)
+  %b = tail call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %b
+}
+
+define <2 x i64> @ctpop_bswap_vec(<2 x i64> %x) {
+; CHECK-LABEL: @ctpop_bswap_vec(
+; CHECK-NEXT:    [[B:%.*]] = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+  %a = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %x)
+  %b = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+  ret <2 x i64> %b
+}
+
+declare i32 @llvm.bitreverse.i32(i32)
+declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
+declare i32 @llvm.bswap.i32(i32)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+declare i32 @llvm.ctpop.i32(i32)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
new file mode 100644
index 00000000000..33b95b02dd2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i8 @llvm.ctpop.i8(i8)
+declare i1 @llvm.ctpop.i1(i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
+declare void @llvm.assume(i1)
+
+define i1 @test1(i32 %arg) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 false
+;
+  %and = and i32 %arg, 15
+  %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+  %res = icmp eq i32 %cnt, 9
+  ret i1 %res
+}
+
+define i1 @test2(i32 %arg) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i1 false
+;
+  %and = and i32 %arg, 1
+  %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+  %res = icmp eq i32 %cnt, 2
+  ret i1 %res
+}
+
+define i1 @test3(i32 %arg) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[ASSUME:%.*]] = icmp eq i32 [[ARG:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[ASSUME]])
+; CHECK-NEXT:    ret i1 false
+;
+  ;; Use an assume to make all the bits known without triggering constant
+  ;; folding.  This is trying to hit a corner case where we have to avoid
+  ;; taking the log of 0.
+  %assume = icmp eq i32 %arg, 0
+  call void @llvm.assume(i1 %assume)
+  %cnt = call i32 @llvm.ctpop.i32(i32 %arg)
+  %res = icmp eq i32 %cnt, 2
+  ret i1 %res
+}
+
+; Negative test for when we know nothing
+define i1 @test4(i8 %arg) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]), !range ![[$RANGE:[0-9]+]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 2
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %cnt = call i8 @llvm.ctpop.i8(i8 %arg)
+  %res = icmp eq i8 %cnt, 2
+  ret i1 %res
+}
+
+; Test when the number of possible known bits isn't one less than a power of 2
+; and the compare value is greater but less than the next power of 2.
+define i1 @test5(i32 %arg) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i1 false
+;
+  %and = and i32 %arg, 3
+  %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+  %res = icmp eq i32 %cnt, 3
+  ret i1 %res
+}
+
+; Test when the number of possible known bits isn't one less than a power of 2
+; and the compare value is greater but less than the next power of 2.
+; TODO: The icmp is unnecessary given the known bits of the input, but range
+; metadata doesn't support vectors
+define <2 x i1> @test5vec(<2 x i32> %arg) {
+; CHECK-LABEL: @test5vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]])
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
+  %and = and <2 x i32> %arg, <i32 3, i32 3>
+  %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %and)
+  %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+  ret <2 x i1> %res
+}
+
+; Make sure we don't add range metadata to i1 ctpop.
+define i1 @test6(i1 %arg) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.ctpop.i1(i1 [[ARG:%.*]])
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.ctpop.i1(i1 %arg)
+  ret i1 %cnt
+}
+
+; CHECK: ![[$RANGE]] = !{i8 0, i8 9}
diff --git a/llvm/test/Transforms/InstCombine/dce-iterate.ll b/llvm/test/Transforms/InstCombine/dce-iterate.ll
new file mode 100644
index 00000000000..1dd45225410
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/dce-iterate.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | grep "ret double .sy"
+
+define internal double @ScaleObjectAdd(double %sx, double %sy, double %sz) nounwind {
+entry:
+        %sx34 = bitcast double %sx to i64               ; <i64> [#uses=1]
+        %sx3435 = zext i64 %sx34 to i960                ; <i960> [#uses=1]
+        %sy22 = bitcast double %sy to i64               ; <i64> [#uses=1]
+        %sy2223 = zext i64 %sy22 to i960                ; <i960> [#uses=1]
+        %sy222324 = shl i960 %sy2223, 320               ; <i960> [#uses=1]
+        %sy222324.ins = or i960 %sx3435, %sy222324              ; <i960> [#uses=1]
+        %sz10 = bitcast double %sz to i64               ; <i64> [#uses=1]
+        %sz1011 = zext i64 %sz10 to i960                ; <i960> [#uses=1]
+        %sz101112 = shl i960 %sz1011, 640               ; <i960> [#uses=1]
+        %sz101112.ins = or i960 %sy222324.ins, %sz101112 
+        
+        %a = trunc i960 %sz101112.ins to i64            ; <i64> [#uses=1]
+        %b = bitcast i64 %a to double           ; <double> [#uses=1]
+        %c = lshr i960 %sz101112.ins, 320               ; <i960> [#uses=1]
+        %d = trunc i960 %c to i64               ; <i64> [#uses=1]
+        %e = bitcast i64 %d to double           ; <double> [#uses=1]
+        %f = fadd double %b, %e
+
+        ret double %e
+}
diff --git a/llvm/test/Transforms/InstCombine/deadcode.ll b/llvm/test/Transforms/InstCombine/deadcode.ll
new file mode 100644
index 00000000000..c5fa58babdb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/deadcode.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instcombine -S | grep "ret i32 %A"
+; RUN: opt < %s -die -S | not grep call.*llvm
+
+define i32 @test(i32 %A) {
+	%X = or i1 false, false		
+	br i1 %X, label %T, label %C
+
+T:		; preds = %0
+	%B = add i32 %A, 1	
+	br label %C
+
+C:		; preds = %T, %0
+	%C.upgrd.1 = phi i32 [ %B, %T ], [ %A, %0 ]
+	ret i32 %C.upgrd.1
+}
+
+define i32* @test2(i32 %width) {
+	%tmp = call i8* @llvm.stacksave( )
+        %tmp14 = alloca i32, i32 %width
+	ret i32* %tmp14
+} 
+
+declare i8* @llvm.stacksave()
+
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+declare void @llvm.lifetime.end.p0i8(i64, i8*)
+
+define void @test3() {
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* undef)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* undef)
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/debug-line.ll b/llvm/test/Transforms/InstCombine/debug-line.ll
new file mode 100644
index 00000000000..61ff5da7e06
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/debug-line.ll
@@ -0,0 +1,26 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+
+@.str = private constant [3 x i8] c"%c\00"
+
+define void @foo() nounwind ssp !dbg !0 {
+;CHECK: call i32 @putchar{{.+}} !dbg
+  %1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
+  ret void, !dbg !7
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10}
+
+!0 = distinct !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !2, file: !8, scope: !1, type: !3)
+!1 = !DIFile(filename: "m.c", directory: "/private/tmp")
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: FullDebug, file: !8, enums: !{}, retainedTypes: !{})
+!3 = !DISubroutineType(types: !4)
+!4 = !{null}
+!5 = !DILocation(line: 5, column: 2, scope: !6)
+!6 = distinct !DILexicalBlock(line: 4, column: 12, file: !8, scope: !0)
+!7 = !DILocation(line: 6, column: 1, scope: !6)
+!8 = !DIFile(filename: "m.c", directory: "/private/tmp")
+!10 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/llvm/test/Transforms/InstCombine/debuginfo-dce.ll b/llvm/test/Transforms/InstCombine/debuginfo-dce.ll
new file mode 100644
index 00000000000..200ea26cdaf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/debuginfo-dce.ll
@@ -0,0 +1,141 @@
+; RUN: opt -instcombine %s -S -o - | FileCheck %s
+; Verify that the eliminated instructions (bitcast, gep, load) are salvaged into
+; a DIExpression.
+;
+; Originally created from the following C source and then heavily isolated/reduced.
+;
+; struct entry {
+;   struct entry *next;
+; };
+; void scan(struct entry *queue, struct entry *end)
+; {
+;   struct entry *entry;
+;   for (entry = (struct entry *)((char *)(queue->next) - 8);
+;        &entry->next == end;
+;        entry = (struct entry *)((char *)(entry->next) - 8)) {
+;   }
+; }
+
+; ModuleID = '<stdin>'
+source_filename = "test.c"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+%struct.entry = type { %struct.entry* }
+
+; This salvage can't currently occur safely (PR40628), however if/when that's
+; ever fixed, then this is definitely a piece of test coverage that should
+; be maintained.
+define void @salvage_load(%struct.entry** %queue) local_unnamed_addr #0 !dbg !14 {
+entry:
+  %im_not_dead = alloca %struct.entry*
+  %0 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19
+  %1 = load %struct.entry*, %struct.entry** %queue, align 8, !dbg !19
+  call void @llvm.dbg.value(metadata %struct.entry* %1, metadata !18, metadata !20), !dbg !19
+; CHECK: define void @salvage_load
+; CHECK-NEXT: entry:
+; CHECK-NOT: dbg.value
+  store %struct.entry* %1, %struct.entry** %im_not_dead, align 8
+  ret void, !dbg !21
+}
+
+define void @salvage_bitcast(%struct.entry* %queue) local_unnamed_addr #0 !dbg !22 {
+entry:
+  %im_not_dead = alloca i8*
+  %0 = bitcast %struct.entry* %queue to i8*, !dbg !23
+  %1 = bitcast %struct.entry* %queue to i8*, !dbg !23
+  call void @llvm.dbg.value(metadata i8* %1, metadata !24, metadata !20), !dbg !23
+; CHECK: define void @salvage_bitcast
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue,
+; CHECK-SAME:                           metadata !DIExpression(DW_OP_plus_uconst, 0))
+  store i8* %1, i8** %im_not_dead, align 8
+  ret void, !dbg !23
+}
+
+define void @salvage_gep0(%struct.entry* %queue, %struct.entry* %end) local_unnamed_addr #0 !dbg !25 {
+entry:
+  %im_not_dead = alloca %struct.entry**
+  %0 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !26
+  %1 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !26
+  call void @llvm.dbg.value(metadata %struct.entry** %1, metadata !27, metadata !20), !dbg !26
+; CHECK: define void @salvage_gep0
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue,
+; CHECK-SAME:                           metadata !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_plus_uconst, 0, DW_OP_stack_value))
+  store %struct.entry** %1, %struct.entry*** %im_not_dead, align 8
+  ret void, !dbg !26
+}
+
+define void @salvage_gep1(%struct.entry* %queue, %struct.entry* %end) local_unnamed_addr #0 !dbg !28 {
+entry:
+  %im_not_dead = alloca %struct.entry**
+  %0 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !29
+  %1 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !29
+  call void @llvm.dbg.value(metadata %struct.entry** %1, metadata !30, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), !dbg !29
+; CHECK: define void @salvage_gep1
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue,
+; CHECK-SAME:     metadata !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value, DW_OP_LLVM_fragment, 0, 32))
+  store %struct.entry** %1, %struct.entry*** %im_not_dead, align 8
+  ret void, !dbg !29
+}
+
+define void @salvage_gep2(%struct.entry* %queue, %struct.entry* %end) local_unnamed_addr #0 !dbg !31 {
+entry:
+  %im_not_dead = alloca %struct.entry**
+  %0 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !32
+  %1 = getelementptr inbounds %struct.entry, %struct.entry* %queue, i32 -1, i32 0, !dbg !32
+  call void @llvm.dbg.value(metadata %struct.entry** %1, metadata !33, metadata !DIExpression(DW_OP_stack_value)), !dbg !32
+; CHECK: define void @salvage_gep2
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.entry* %queue,
+; CHECK-SAME:     metadata !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value))
+  store %struct.entry** %1, %struct.entry*** %im_not_dead, align 8
+  ret void, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
+!5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "entry", file: !1, line: 1, size: 64, elements: !6)
+!6 = !{!7}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "next", scope: !5, file: !1, line: 2, baseType: !4, size: 64)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
+!9 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i32 1, !"PIC Level", i32 2}
+!13 = !{!"clang version 5.0.0 (trunk 297628) (llvm/trunk 297643)"}
+!14 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null, !4, !4}
+!17 = !{!18}
+!18 = !DILocalVariable(name: "entry", scope: !14, file: !1, line: 6, type: !4)
+!19 = !DILocation(line: 6, column: 17, scope: !14)
+!20 = !DIExpression(DW_OP_plus_uconst, 0)
+!21 = !DILocation(line: 11, column: 1, scope: !14)
+!22 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!23 = !DILocation(line: 6, column: 17, scope: !22)
+!24 = !DILocalVariable(name: "entry", scope: !22, file: !1, line: 6, type: !4)
+!25 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!26 = !DILocation(line: 6, column: 17, scope: !25)
+!27 = !DILocalVariable(name: "entry", scope: !25, file: !1, line: 6, type: !4)
+!28 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!29 = !DILocation(line: 6, column: 17, scope: !28)
+!30 = !DILocalVariable(name: "entry", scope: !28, file: !1, line: 6, type: !4)
+!31 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17)
+!32 = !DILocation(line: 6, column: 17, scope: !31)
+!33 = !DILocalVariable(name: "entry", scope: !31, file: !1, line: 6, type: !4)
diff --git a/llvm/test/Transforms/InstCombine/debuginfo-dce2.ll b/llvm/test/Transforms/InstCombine/debuginfo-dce2.ll
new file mode 100644
index 00000000000..82d362e5239
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/debuginfo-dce2.ll
@@ -0,0 +1,70 @@
+; RUN: opt -instcombine -S %s -o - | FileCheck %s
+
+; In this example, the cast from i8* to i32* becomes trivially dead. We should
+; salvage its debug info.
+
+; C source:
+; void use_as_void(void *);
+; void f(void *p) {
+;   int *q = (int *)p;
+;   use_as_void(q);
+; }
+
+; ModuleID = '<stdin>'
+source_filename = "t.c"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.11.25508"
+
+; Function Attrs: nounwind uwtable
+define void @f(i8* %p) !dbg !11 {
+entry:
+  call void @llvm.dbg.value(metadata i8* %p, metadata !16, metadata !DIExpression()), !dbg !18
+  %0 = bitcast i8* %p to i32*, !dbg !19
+  call void @llvm.dbg.value(metadata i32* %0, metadata !17, metadata !DIExpression()), !dbg !20
+  %1 = bitcast i32* %0 to i8*, !dbg !21
+  call void @use_as_void(i8* %1), !dbg !22
+  ret void, !dbg !23
+}
+
+; CHECK-LABEL: define void @f(i8* %p)
+; CHECK: call void @llvm.dbg.value(metadata i8* %p, metadata ![[P_VAR:[0-9]+]], metadata !DIExpression())
+; CHECK-NOT: bitcast
+; CHECK: call void @llvm.dbg.value(metadata i8* %p, metadata ![[Q_VAR:[0-9]+]], metadata !DIExpression())
+; CHECK-NOT: bitcast
+; CHECK ret void
+
+; CHECK: ![[P_VAR]] = !DILocalVariable(name: "p", {{.*}})
+; CHECK: ![[Q_VAR]] = !DILocalVariable(name: "q", {{.*}})
+
+declare void @use_as_void(i8*)
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
+!1 = !DIFile(filename: "t.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild", checksumkind: CSK_MD5, checksum: "56c40617ada23a8cccbd9a16bcec57af")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!6 = !{i32 2, !"CodeView", i32 1}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i32 1, !"wchar_size", i32 2}
+!9 = !{i32 7, !"PIC Level", i32 2}
+!10 = !{!"clang version 6.0.0 "}
+!11 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, type: !12, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !15)
+!12 = !DISubroutineType(types: !13)
+!13 = !{null, !14}
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!15 = !{!16, !17}
+!16 = !DILocalVariable(name: "p", arg: 1, scope: !11, file: !1, line: 2, type: !14)
+!17 = !DILocalVariable(name: "q", scope: !11, file: !1, line: 3, type: !4)
+!18 = !DILocation(line: 2, column: 14, scope: !11)
+!19 = !DILocation(line: 3, column: 12, scope: !11)
+!20 = !DILocation(line: 3, column: 8, scope: !11)
+!21 = !DILocation(line: 4, column: 15, scope: !11)
+!22 = !DILocation(line: 4, column: 3, scope: !11)
+!23 = !DILocation(line: 5, column: 1, scope: !11)
diff --git a/llvm/test/Transforms/InstCombine/debuginfo-sink.ll b/llvm/test/Transforms/InstCombine/debuginfo-sink.ll
new file mode 100644
index 00000000000..1e6277748fa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/debuginfo-sink.ll
@@ -0,0 +1,78 @@
+; RUN: opt  %s -instcombine -S | FileCheck %s
+
+; Test sinking of dbg.values when instcombine sinks associated instructions.
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+; This GEP is sunk, but can be folded into a DIExpression. Check that it
+; gets folded. The dbg.value should be duplicated in the block its sunk
+; into, to maximise liveness.
+;
+; CHECK-LABEL: define i32 @foo(i32*
+; CHECK:       call void @llvm.dbg.value(metadata i32* %a, metadata !{{[0-9]+}},
+; CHECK-SAME:  metadata !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value))
+; CHECK-NEXT:  br label %sink1
+
+define i32 @foo(i32 *%a) !dbg !7 {
+entry:
+  %gep = getelementptr i32, i32 *%a, i32 1
+  call void @llvm.dbg.value(metadata i32 *%gep, metadata !16, metadata !12), !dbg !15
+  br label %sink1
+
+sink1:
+; CHECK-LABEL: sink1:
+; CHECK:       call void @llvm.dbg.value(metadata i32* %gep,
+; CHECK-SAME:                    metadata !{{[0-9]+}}, metadata !DIExpression())
+; CHECK-NEXT:  load
+  %0 = load i32, i32* %gep, align 4, !dbg !15
+  ret i32 %0, !dbg !15
+}
+
+; In this example the GEP cannot (yet) be salvaged. Check that not only is the
+; dbg.value sunk, but an undef dbg.value is left to terminate any earlier
+; value range.
+
+; CHECK-LABEL: define i32 @bar(
+; CHECK:       call void @llvm.dbg.value(metadata i32* undef,
+; CHECK-NEXT:  br label %sink2
+
+define i32 @bar(i32 *%a, i32 %b) !dbg !70 {
+entry:
+  %gep = getelementptr i32, i32 *%a, i32 %b
+  call void @llvm.dbg.value(metadata i32* %gep, metadata !73, metadata !12), !dbg !74
+  br label %sink2
+
+sink2:
+; CHECK-LABEL: sink2:
+; CHECK:       call void @llvm.dbg.value(metadata i32* %gep,
+; CHECK-SAME:                    metadata !{{[0-9]+}}, metadata !DIExpression())
+; CHECK-NEXT:  load
+; CHECK-NEXT:  ret
+  %0 = load i32, i32* %gep
+  ret i32 %0
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "a.c", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"PIC Level", i32 2}
+!6 = !{!"clang"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "j", scope: !7, file: !1, line: 2, type: !10)
+!12 = !DIExpression()
+!15 = !DILocation(line: 5, column: 3, scope: !7)
+!16 = !DILocalVariable(name: "h", scope: !7, file: !1, line: 4, type: !10)
+!70 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !71, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!71 = !DISubroutineType(types: !72)
+!72 = !{!10, !10, !10}
+!73 = !DILocalVariable(name: "k", scope: !70, file: !1, line: 2, type: !10)
+!74 = !DILocation(line: 5, column: 3, scope: !70)
diff --git a/llvm/test/Transforms/InstCombine/debuginfo-skip.ll b/llvm/test/Transforms/InstCombine/debuginfo-skip.ll
new file mode 100644
index 00000000000..2fe0c77413b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/debuginfo-skip.ll
@@ -0,0 +1,45 @@
+; RUN: opt -instcombine-lower-dbg-declare=0 < %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine-lower-dbg-declare=1 < %s -instcombine -S | FileCheck %s
+
+define i32 @foo(i32 %j) #0 !dbg !7 {
+entry:
+  %j.addr = alloca i32, align 4
+  store i32 %j, i32* %j.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %j.addr, metadata !11, metadata !12), !dbg !13
+  call void @llvm.dbg.value(metadata i32 10, metadata !16, metadata !12), !dbg !15
+  %0 = load i32, i32* %j.addr, align 4, !dbg !14
+  ret i32 %0, !dbg !15
+}
+
+; Instcombine can remove the alloca and forward the load to store, but it
+; should convert the declare to dbg value.
+; CHECK-LABEL: define i32 @foo(i32 %j)
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.dbg.value(metadata i32 %j, {{.*}})
+; CHECK: call void @llvm.dbg.value(metadata i32 10, {{.*}})
+; CHECK: ret i32 %j
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "a.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"PIC Level", i32 2}
+!6 = !{!"clang version 5.0.0 (trunk 302918) (llvm/trunk 302925)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "j", arg: 1, scope: !7, file: !1, line: 2, type: !10)
+!12 = !DIExpression()
+!13 = !DILocation(line: 2, column: 13, scope: !7)
+!14 = !DILocation(line: 5, column: 10, scope: !7)
+!15 = !DILocation(line: 5, column: 3, scope: !7)
+!16 = !DILocalVariable(name: "h", scope: !7, file: !1, line: 4, type: !10)
diff --git a/llvm/test/Transforms/InstCombine/debuginfo-variables.ll b/llvm/test/Transforms/InstCombine/debuginfo-variables.ll
new file mode 100644
index 00000000000..dcb07d5e678
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/debuginfo-variables.ll
@@ -0,0 +1,122 @@
+; RUN: opt < %s -debugify -instcombine -S | FileCheck %s
+
+declare void @escape32(i32)
+
+define i64 @test_sext_zext(i16 %A) {
+; CHECK-LABEL: @test_sext_zext(
+; CHECK-NEXT:  [[C2:%.*]] = zext i16 %A to i64
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 [[C2]], {{.*}}, metadata !DIExpression())
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 [[C2]], {{.*}}, metadata !DIExpression())
+  %c1 = zext i16 %A to i32
+  %c2 = sext i32 %c1 to i64
+  ret i64 %c2
+}
+
+define i64 @test_used_sext_zext(i16 %A) {
+; CHECK-LABEL: @test_used_sext_zext(
+; CHECK-NEXT:  [[C1:%.*]] = zext i16 %A to i32
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i32 [[C1]], {{.*}}, metadata !DIExpression())
+; CHECK-NEXT:  [[C2:%.*]] = zext i16 %A to i64
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 [[C2]], {{.*}}, metadata !DIExpression())
+; CHECK-NEXT:  call void @escape32(i32 %c1)
+; CHECK-NEXT:  ret i64 %c2
+  %c1 = zext i16 %A to i32
+  %c2 = sext i32 %c1 to i64
+  call void @escape32(i32 %c1)
+  ret i64 %c2
+}
+
+define i32 @test_cast_select(i1 %cond) {
+; CHECK-LABEL: @test_cast_select(
+; CHECK-NEXT:  [[sel:%.*]] = select i1 %cond, i32 3, i32 5
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i32 [[sel]], {{.*}}, metadata !DIExpression())
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i32 [[sel]], {{.*}}, metadata !DIExpression())
+; CHECK-NEXT:  ret i32 [[sel]]
+  %sel = select i1 %cond, i16 3, i16 5
+  %cast = zext i16 %sel to i32
+  ret i32 %cast
+}
+
+define void @test_or(i64 %A) {
+; CHECK-LABEL: @test_or(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 256, DW_OP_or, DW_OP_stack_value))
+  %1 = or i64 %A, 256
+  ret void
+}
+
+define void @test_xor(i32 %A) {
+; CHECK-LABEL: @test_xor(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i32 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 1, DW_OP_xor, DW_OP_stack_value))
+  %1 = xor i32 %A, 1
+  ret void
+}
+
+define void @test_sub_neg(i64 %A) {
+; CHECK-LABEL: @test_sub_neg(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value))
+  %1 = sub i64 %A, -1
+  ret void
+}
+
+define void @test_sub_pos(i64 %A) {
+; CHECK-LABEL: @test_sub_pos(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value))
+  %1 = sub i64 %A, 1
+  ret void
+}
+
+define void @test_shl(i64 %A) {
+; CHECK-LABEL: @test_shl(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_shl, DW_OP_stack_value))
+  %1 = shl i64 %A, 7
+  ret void
+}
+
+define void @test_lshr(i64 %A) {
+; CHECK-LABEL: @test_lshr(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_shr, DW_OP_stack_value))
+  %1 = lshr i64 %A, 7
+  ret void
+}
+
+define void @test_ashr(i64 %A) {
+; CHECK-LABEL: @test_ashr(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_shra, DW_OP_stack_value))
+  %1 = ashr i64 %A, 7
+  ret void
+}
+
+define void @test_mul(i64 %A) {
+; CHECK-LABEL: @test_mul(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_mul, DW_OP_stack_value))
+  %1 = mul i64 %A, 7
+  ret void
+}
+
+define void @test_sdiv(i64 %A) {
+; CHECK-LABEL: @test_sdiv(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_div, DW_OP_stack_value))
+  %1 = sdiv i64 %A, 7
+  ret void
+}
+
+define void @test_srem(i64 %A) {
+; CHECK-LABEL: @test_srem(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_mod, DW_OP_stack_value))
+  %1 = srem i64 %A, 7
+  ret void
+}
+
+define void @test_ptrtoint(i64* %P) {
+; CHECK-LABEL: @test_ptrtoint
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64* %P, {{.*}}, metadata !DIExpression())
+  %1 = ptrtoint i64* %P to i64
+  ret void
+}
+
+define void @test_and(i64 %A) {
+; CHECK-LABEL: @test_and(
+; CHECK-NEXT:  call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 256, DW_OP_and, DW_OP_stack_value))
+  %1 = and i64 %A, 256
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/debuginfo.ll b/llvm/test/Transforms/InstCombine/debuginfo.ll
new file mode 100644
index 00000000000..05466590a01
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/debuginfo.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -instcombine -instcombine-lower-dbg-declare=0 -S \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=NOLOWER
+; RUN: opt < %s -instcombine -instcombine-lower-dbg-declare=1 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--linux"
+
+%struct.TwoRegs = type { i64, i64 }
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
+
+declare i8* @passthru_callee(i8*, i32, i64, i64)
+
+define i8* @passthru(i8* %a, i32 %b, i64 %c) !dbg !1 {
+entry:
+  %a.addr = alloca i8*, align 8
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i64, align 8
+  store i8* %a, i8** %a.addr, align 8
+  call void @llvm.dbg.declare(metadata i8** %a.addr, metadata !0, metadata !DIExpression()), !dbg !16
+  store i32 %b, i32* %b.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !7, metadata !DIExpression()), !dbg !18
+  store i64 %c, i64* %c.addr, align 8
+  call void @llvm.dbg.declare(metadata i64* %c.addr, metadata !9, metadata !DIExpression()), !dbg !20
+  %tmp = load i8*, i8** %a.addr, align 8, !dbg !21
+  %tmp1 = load i32, i32* %b.addr, align 4, !dbg !21
+  %tmp2 = load i64, i64* %c.addr, align 8, !dbg !21
+  %tmp3 = load i8*, i8** %a.addr, align 8, !dbg !21
+  %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp3, i1 false), !dbg !21
+  %call = call i8* @passthru_callee(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21
+  ret i8* %call, !dbg !21
+}
+
+; CHECK-LABEL: define i8* @passthru(i8* %a, i32 %b, i64 %c)
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.value(metadata i8* %a, {{.*}})
+; CHECK-NOT: store
+; CHECK: call void @llvm.dbg.value(metadata i32 %b, {{.*}})
+; CHECK-NOT: store
+; CHECK: call void @llvm.dbg.value(metadata i64 %c, {{.*}})
+; CHECK-NOT: store
+; CHECK: call i8* @passthru_callee(i8* %a, i32 %b, i64 %c, i64 %{{.*}})
+
+declare void @tworegs_callee(i64, i64)
+
+; Lowering dbg.declare in instcombine doesn't handle this case very well.
+
+define void @tworegs(i64 %o.coerce0, i64 %o.coerce1) !dbg !31 {
+entry:
+  %o = alloca %struct.TwoRegs, align 8
+  %0 = bitcast %struct.TwoRegs* %o to { i64, i64 }*
+  %1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 0
+  store i64 %o.coerce0, i64* %1, align 8
+  %2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %0, i32 0, i32 1
+  store i64 %o.coerce1, i64* %2, align 8
+  call void @llvm.dbg.declare(metadata %struct.TwoRegs* %o, metadata !35, metadata !DIExpression()), !dbg !32
+  %3 = bitcast %struct.TwoRegs* %o to { i64, i64 }*, !dbg !33
+  %4 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 0, !dbg !33
+  %5 = load i64, i64* %4, align 8, !dbg !33
+  %6 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 1, !dbg !33
+  %7 = load i64, i64* %6, align 8, !dbg !33
+  call void @tworegs_callee(i64 %5, i64 %7), !dbg !33
+  ret void, !dbg !33
+}
+
+; NOLOWER-LABEL: define void @tworegs(i64 %o.coerce0, i64 %o.coerce1)
+; NOLOWER-NOT: alloca
+; NOLOWER-NOT: store
+; NOLOWER-NOT: call void @llvm.dbg.declare
+; Here we want to find:  call void @llvm.dbg.value(metadata i64 %o.coerce0, metadata [[VARIABLE_O]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64))
+; NOLOWER: call void @llvm.dbg.value(metadata i64 undef, {{.*}})
+; NOLOWER-NOT: store
+; Here we want to find:  call void @llvm.dbg.value(metadata i64 %o.coerce1, metadata [[VARIABLE_O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64))
+; NOLOWER: call void @llvm.dbg.value(metadata i64 undef, {{.*}})
+; NOLOWER-NOT: store
+; NOLOWER: call void @tworegs_callee(i64 %o.coerce0, i64 %o.coerce1)
+
+
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
+
+!0 = !DILocalVariable(name: "a", line: 78, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "passthru", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !3, scopeLine: 79, file: !27, scope: !2, type: !4, retainedNodes: !25)
+!2 = !DIFile(filename: "string.h", directory: "Game")
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: FullDebug, file: !28, enums: !29, retainedTypes: !29)
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6}
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !3, baseType: null)
+!7 = !DILocalVariable(name: "b", line: 78, arg: 2, scope: !1, file: !2, type: !8)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DILocalVariable(name: "c", line: 78, arg: 3, scope: !1, file: !2, type: !12)
+!12 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!16 = !DILocation(line: 78, column: 28, scope: !1)
+!18 = !DILocation(line: 78, column: 40, scope: !1)
+!20 = !DILocation(line: 78, column: 54, scope: !1)
+!21 = !DILocation(line: 80, column: 3, scope: !22)
+!22 = distinct !DILexicalBlock(line: 80, column: 3, file: !27, scope: !23)
+!23 = distinct !DILexicalBlock(line: 79, column: 1, file: !27, scope: !1)
+!25 = !{!0, !7, !9}
+!27 = !DIFile(filename: "string.h", directory: "Game")
+!28 = !DIFile(filename: "bits.c", directory: "Game")
+!29 = !{}
+!30 = !{i32 1, !"Debug Info Version", i32 3}
+
+!31 = distinct !DISubprogram(name: "tworegs", scope: !28, file: !28, line: 4, type: !4, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !3, retainedNodes: !34)
+!32 = !DILocation(line: 4, column: 23, scope: !31)
+!33 = !DILocation(line: 5, column: 3, scope: !31)
+!34 = !{!35}
+!35 = !DILocalVariable(name: "o", arg: 1, scope: !31, file: !28, line: 4, type: !36)
+!36 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TwoRegs", file: !28, line: 1, size: 128, elements: !37)
+!37 = !{!38, !39}
+!38 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !36, file: !28, line: 1, baseType: !12, size: 64)
+!39 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !36, file: !28, line: 1, baseType: !12, size: 64)
+!40 = !DISubroutineType(types: !41)
+!41 = !{!36}
diff --git a/llvm/test/Transforms/InstCombine/debuginfo_add.ll b/llvm/test/Transforms/InstCombine/debuginfo_add.ll
new file mode 100644
index 00000000000..cf9fd923b25
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/debuginfo_add.ll
@@ -0,0 +1,114 @@
+; RUN: opt -instcombine %s -o - -S | FileCheck %s
+; typedef struct v *v_t;
+; struct v {
+;   unsigned long long p;
+; };
+;  
+; void f(v_t object, unsigned long long *start) {
+;   unsigned head_size;
+;   unsigned long long orig_start;
+;   unsigned long long offset;
+;   orig_start = *start;
+;   for (offset = orig_start - (unsigned long long)(1 << 12); head_size;
+;        offset -= (unsigned long long)(1 << 12), head_size -= (1 << 12))
+;     use(offset, (object));
+; }
+source_filename = "test.i"
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios5.0.0"
+
+%struct.vm_object = type { i64 }
+
+; Function Attrs: nounwind ssp
+define void @f(%struct.vm_object* %object, i64* nocapture readonly %start) local_unnamed_addr #0 !dbg !11 {
+entry:
+  tail call void @llvm.dbg.value(metadata %struct.vm_object* %object, metadata !21, metadata !DIExpression()), !dbg !27
+  tail call void @llvm.dbg.value(metadata i64* %start, metadata !22, metadata !DIExpression()), !dbg !28
+  %0 = load i64, i64* %start, align 4, !dbg !29
+  tail call void @llvm.dbg.value(metadata i64 %0, metadata !25, metadata !DIExpression()), !dbg !30
+  %offset.08 = add i64 %0, -4096
+  tail call void @llvm.dbg.value(metadata i64 %offset.08, metadata !26, metadata !DIExpression()), !dbg !31
+  tail call void @llvm.dbg.value(metadata i32 undef, metadata !23, metadata !DIExpression()), !dbg !32
+  br i1 undef, label %for.end, label %for.body.lr.ph, !dbg !32
+
+for.body.lr.ph:                                   ; preds = %entry
+  ; The 'load' and the 'add' are sunken to this basic block. So let's verify that the related dbg.values are sunken as well.
+  ; The add is later eliminated, so we verify that the dbg.value is salvaged by using DW_OP_minus.
+  ; CHECK-LABEL: for.body.lr.ph:
+  ; CHECK-NEXT: %0 = load
+  ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %0, metadata !25, metadata !DIExpression()), !dbg !
+  ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %0, metadata !26, metadata !DIExpression(DW_OP_constu, 4096, DW_OP_minus, DW_OP_stack_value)), !dbg !
+  br label %for.body, !dbg !32
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  ; CHECK-LABEL: for.body:
+  %offset.010 = phi i64 [ %offset.08, %for.body.lr.ph ], [ %offset.0, %for.body ]
+  %head_size.09 = phi i32 [ undef, %for.body.lr.ph ], [ %sub2, %for.body ]
+  tail call void @llvm.dbg.value(metadata i32 %head_size.09, metadata !23, metadata !DIExpression()), !dbg !31
+  %call = tail call i32 bitcast (i32 (...)* @use to i32 (i64, %struct.vm_object*)*)(i64 %offset.010, %struct.vm_object* %object) #3, !dbg !34
+  %sub2 = add i32 %head_size.09, -4096, !dbg !37
+  %offset.0 = add i64 %offset.010, -4096
+  tail call void @llvm.dbg.value(metadata i64 %offset.0, metadata !26, metadata !DIExpression()), !dbg !30
+  ; CHECK: call void @llvm.dbg.value(metadata i64 %offset.010, metadata !26, metadata !DIExpression(DW_OP_constu, 4096, DW_OP_minus, DW_OP_stack_value)), !dbg !
+  tail call void @llvm.dbg.value(metadata i32 %sub2, metadata !23, metadata !DIExpression()), !dbg !31
+  %tobool = icmp eq i32 %sub2, 0, !dbg !32
+  br i1 %tobool, label %for.end, label %for.body, !dbg !32, !llvm.loop !38
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !40
+}
+
+declare i32 @use(...) local_unnamed_addr
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+
+attributes #0 = { nounwind ssp }
+attributes #2 = { nounwind readnone speculatable }
+attributes #3 = { nobuiltin }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5, !6, !7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 (trunk 317434) (llvm/trunk 317437)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
+!1 = !DIFile(filename: "test.i", directory: "/Data/radar/31209283")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+!5 = !{i32 2, !"Dwarf Version", i32 2}
+!6 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = !{i32 1, !"wchar_size", i32 4}
+!8 = !{i32 1, !"min_enum_size", i32 4}
+!9 = !{i32 7, !"PIC Level", i32 2}
+!10 = !{!"clang version 6.0.0 (trunk 317434) (llvm/trunk 317437)"}
+!11 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 6, type: !12, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !20)
+!12 = !DISubroutineType(types: !13)
+!13 = !{null, !14, !19}
+!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "v_t", file: !1, line: 1, baseType: !15)
+!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 32)
+!16 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "v", file: !1, line: 2, size: 64, elements: !17)
+!17 = !{!18}
+!18 = !DIDerivedType(tag: DW_TAG_member, name: "p", scope: !16, file: !1, line: 3, baseType: !4, size: 64)
+!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !4, size: 32)
+!20 = !{!21, !22, !23, !25, !26}
+!21 = !DILocalVariable(name: "object", arg: 1, scope: !11, file: !1, line: 6, type: !14)
+!22 = !DILocalVariable(name: "start", arg: 2, scope: !11, file: !1, line: 6, type: !19)
+!23 = !DILocalVariable(name: "head_size", scope: !11, file: !1, line: 7, type: !24)
+!24 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!25 = !DILocalVariable(name: "orig_start", scope: !11, file: !1, line: 8, type: !4)
+!26 = !DILocalVariable(name: "offset", scope: !11, file: !1, line: 9, type: !4)
+!27 = !DILocation(line: 6, column: 20, scope: !11)
+!28 = !DILocation(line: 6, column: 48, scope: !11)
+!29 = !DILocation(line: 8, column: 22, scope: !11)
+!30 = !DILocation(line: 7, column: 12, scope: !11)
+!31 = !DILocation(line: 10, column: 16, scope: !11)
+!32 = !DILocation(line: 11, column: 5, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !11, file: !1, line: 11, column: 5)
+!34 = !DILocation(line: 13, column: 7, scope: !35)
+!35 = distinct !DILexicalBlock(scope: !36, file: !1, line: 12, column: 75)
+!36 = distinct !DILexicalBlock(scope: !33, file: !1, line: 11, column: 5)
+!37 = !DILocation(line: 12, column: 61, scope: !36)
+!38 = distinct !{!38, !32, !39}
+!39 = !DILocation(line: 14, column: 3, scope: !33)
+!40 = !DILocation(line: 15, column: 1, scope: !11)
diff --git a/llvm/test/Transforms/InstCombine/default-alignment.ll b/llvm/test/Transforms/InstCombine/default-alignment.ll
new file mode 100644
index 00000000000..718da213671
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/default-alignment.ll
@@ -0,0 +1,10 @@
+; RUN: opt -verify -instcombine < %s
+%Foo = type <{ i8, x86_fp80 }>
+
+define i8 @t(%Foo* %arg) {
+entry:
+  %0 = getelementptr %Foo, %Foo* %arg, i32 0, i32 0
+  %1 = load i8, i8* %0, align 1
+  ret i8 %1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/demand_shrink_nsw.ll b/llvm/test/Transforms/InstCombine/demand_shrink_nsw.ll
new file mode 100644
index 00000000000..be19fbc4f91
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/demand_shrink_nsw.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -o - -S %s | FileCheck %s
+
+; The constant at %v35 should be shrunk, but this must lead to the nsw flag of
+; %v43 getting removed.
+
+define i32 @foo(i32 %arg) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[V33:%.*]] = and i32 [[ARG:%.*]], 223
+; CHECK-NEXT:    [[V34:%.*]] = xor i32 [[V33]], 29
+; CHECK-NEXT:    [[V35:%.*]] = add nuw nsw i32 [[V34]], 1362915575
+; CHECK-NEXT:    [[V40:%.*]] = shl nuw nsw i32 [[V34]], 1
+; CHECK-NEXT:    [[V41:%.*]] = and i32 [[V40]], 290
+; CHECK-NEXT:    [[V42:%.*]] = sub nuw nsw i32 [[V35]], [[V41]]
+; CHECK-NEXT:    [[V43:%.*]] = add nuw i32 [[V42]], 1533579450
+; CHECK-NEXT:    [[V45:%.*]] = xor i32 [[V43]], 749011377
+; CHECK-NEXT:    ret i32 [[V45]]
+;
+  %v33 = and i32 %arg, 223
+  %v34 = xor i32 %v33, 29
+  %v35 = add nuw i32 %v34, 3510399223
+  %v37 = or i32 %v34, 1874836915
+  %v38 = and i32 %v34, 221
+  %v39 = xor i32 %v38, 1874836915
+  %v40 = xor i32 %v37, %v39
+  %v41 = shl nsw nuw i32 %v40, 1
+  %v42 = sub i32 %v35, %v41
+  %v43 = add nsw i32 %v42, 1533579450
+  %v44 = or i32 %v43, -2147483648
+  %v45 = xor i32 %v44, 749011377
+  ret i32 %v45
+}
+
diff --git a/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll b/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
new file mode 100644
index 00000000000..c378033eef7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/demorgan-sink-not-into-xor.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38446
+
+; Pattern:
+;   ~(x ^ y)
+; Should be transformed into:
+;   (~x) ^ y
+; or into
+;   x ^ (~y)
+
+; While -reassociate does handle this simple pattern, it does not handle
+; the more complicated motivating pattern.
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+; If the operand is easily-invertible, fold into it.
+declare i1 @gen1()
+
+define i1 @positive_easyinvert(i16 %x, i8 %y) {
+; CHECK-LABEL: @positive_easyinvert(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = icmp slt i16 %x, 0
+  %tmp2 = icmp slt i8 %y, 0
+  %tmp3 = xor i1 %tmp2, %tmp1
+  %tmp4 = xor i1 %tmp3, true
+  ret i1 %tmp4
+}
+
+define i1 @positive_easyinvert0(i8 %y) {
+; CHECK-LABEL: @positive_easyinvert0(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = call i1 @gen1()
+  %tmp2 = icmp slt i8 %y, 0
+  %tmp3 = xor i1 %tmp2, %tmp1
+  %tmp4 = xor i1 %tmp3, true
+  ret i1 %tmp4
+}
+
+define i1 @positive_easyinvert1(i8 %y) {
+; CHECK-LABEL: @positive_easyinvert1(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = call i1 @gen1()
+  %tmp2 = icmp slt i8 %y, 0
+  %tmp3 = xor i1 %tmp1, %tmp2
+  %tmp4 = xor i1 %tmp3, true
+  ret i1 %tmp4
+}
+
+; ============================================================================ ;
+; One-use tests with easily-invertible operand.
+; ============================================================================ ;
+
+declare void @use1(i1)
+
+define i1 @oneuse_easyinvert_0(i8 %y) {
+; CHECK-LABEL: @oneuse_easyinvert_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
+; CHECK-NEXT:    call void @use1(i1 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP3]], true
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = call i1 @gen1()
+  %tmp2 = icmp slt i8 %y, 0
+  call void @use1(i1 %tmp2)
+  %tmp3 = xor i1 %tmp1, %tmp2
+  %tmp4 = xor i1 %tmp3, true
+  ret i1 %tmp4
+}
+
+define i1 @oneuse_easyinvert_1(i8 %y) {
+; CHECK-LABEL: @oneuse_easyinvert_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    call void @use1(i1 [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP3]], true
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = call i1 @gen1()
+  %tmp2 = icmp slt i8 %y, 0
+  %tmp3 = xor i1 %tmp1, %tmp2
+  call void @use1(i1 %tmp3)
+  %tmp4 = xor i1 %tmp3, true
+  ret i1 %tmp4
+}
+
+define i1 @oneuse_easyinvert_2(i8 %y) {
+; CHECK-LABEL: @oneuse_easyinvert_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @gen1()
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
+; CHECK-NEXT:    call void @use1(i1 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    call void @use1(i1 [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i1 [[TMP3]], true
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = call i1 @gen1()
+  %tmp2 = icmp slt i8 %y, 0
+  call void @use1(i1 %tmp2)
+  %tmp3 = xor i1 %tmp1, %tmp2
+  call void @use1(i1 %tmp3)
+  %tmp4 = xor i1 %tmp3, true
+  ret i1 %tmp4
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; Not easily invertible.
+define i32 @negative(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = xor i32 %x, %y
+  %tmp2 = xor i32 %tmp1, -1
+  ret i32 %tmp2
+}
diff --git a/llvm/test/Transforms/InstCombine/demorgan.ll b/llvm/test/Transforms/InstCombine/demorgan.ll
new file mode 100644
index 00000000000..8c3d3b83046
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/demorgan.ll
@@ -0,0 +1,501 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (~A | ~B) == ~(A & B)
+
+define i43 @demorgan_or_apint1(i43 %A, i43 %B) {
+; CHECK-LABEL: @demorgan_or_apint1(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i43 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i43 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i43 [[C]]
+;
+  %NotA = xor i43 %A, -1
+  %NotB = xor i43 %B, -1
+  %C = or i43 %NotA, %NotB
+  ret i43 %C
+}
+
+; (~A | ~B) == ~(A & B)
+
+define i129 @demorgan_or_apint2(i129 %A, i129 %B) {
+; CHECK-LABEL: @demorgan_or_apint2(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i129 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i129 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i129 [[C]]
+;
+  %NotA = xor i129 %A, -1
+  %NotB = xor i129 %B, -1
+  %C = or i129 %NotA, %NotB
+  ret i129 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i477 @demorgan_and_apint1(i477 %A, i477 %B) {
+; CHECK-LABEL: @demorgan_and_apint1(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i477 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i477 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i477 [[C]]
+;
+  %NotA = xor i477 %A, -1
+  %NotB = xor i477 %B, -1
+  %C = and i477 %NotA, %NotB
+  ret i477 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i129 @demorgan_and_apint2(i129 %A, i129 %B) {
+; CHECK-LABEL: @demorgan_and_apint2(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i129 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i129 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i129 [[C]]
+;
+  %NotA = xor i129 %A, -1
+  %NotB = xor i129 %B, -1
+  %C = and i129 %NotA, %NotB
+  ret i129 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i65 @demorgan_and_apint3(i65 %A, i65 %B) {
+; CHECK-LABEL: @demorgan_and_apint3(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i65 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i65 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i65 [[C]]
+;
+  %NotA = xor i65 %A, -1
+  %NotB = xor i65 -1, %B
+  %C = and i65 %NotA, %NotB
+  ret i65 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i66 @demorgan_and_apint4(i66 %A, i66 %B) {
+; CHECK-LABEL: @demorgan_and_apint4(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i66 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i66 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i66 [[C]]
+;
+  %NotA = xor i66 %A, -1
+  %NotB = xor i66 %B, -1
+  %C = and i66 %NotA, %NotB
+  ret i66 %C
+}
+
+; (~A & ~B) == ~(A | B)
+
+define i47 @demorgan_and_apint5(i47 %A, i47 %B) {
+; CHECK-LABEL: @demorgan_and_apint5(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i47 %A, %B
+; CHECK-NEXT:    [[C:%.*]] = xor i47 [[C_DEMORGAN]], -1
+; CHECK-NEXT:    ret i47 [[C]]
+;
+  %NotA = xor i47 %A, -1
+  %NotB = xor i47 %B, -1
+  %C = and i47 %NotA, %NotB
+  ret i47 %C
+}
+
+; This is confirming that 2 transforms work together:
+; ~(~A & ~B) --> A | B
+
+define i32 @test3(i32 %A, i32 %B) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i32 %A, %B
+; CHECK-NEXT:    ret i32 [[C_DEMORGAN]]
+;
+  %nota = xor i32 %A, -1
+  %notb = xor i32 %B, -1
+  %c = and i32 %nota, %notb
+  %notc = xor i32 %c, -1
+  ret i32 %notc
+}
+
+; Invert a constant if needed:
+; ~(~A & 5) --> A | ~5
+
+define i32 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[NOTC1:%.*]] = or i32 %A, -6
+; CHECK-NEXT:    ret i32 [[NOTC1]]
+;
+  %nota = xor i32 %A, -1
+  %c = and i32 %nota, 5
+  %notc = xor i32 %c, -1
+  ret i32 %notc
+}
+
+; Test the mirror of DeMorgan's law with an extra 'not'.
+; ~(~A | ~B) --> A & B
+
+define i32 @test5(i32 %A, i32 %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i32 %A, %B
+; CHECK-NEXT:    ret i32 [[C_DEMORGAN]]
+;
+  %nota = xor i32 %A, -1
+  %notb = xor i32 %B, -1
+  %c = or i32 %nota, %notb
+  %notc = xor i32 %c, -1
+  ret i32 %notc
+}
+
+; Repeat with weird types for extra coverage.
+; ~(~A & ~B) --> A | B
+
+define i47 @test3_apint(i47 %A, i47 %B) {
+; CHECK-LABEL: @test3_apint(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = or i47 %A, %B
+; CHECK-NEXT:    ret i47 [[C_DEMORGAN]]
+;
+  %nota = xor i47 %A, -1
+  %notb = xor i47 %B, -1
+  %c = and i47 %nota, %notb
+  %notc = xor i47 %c, -1
+  ret i47 %notc
+}
+
+; ~(~A & 5) --> A | ~5
+
+define i61 @test4_apint(i61 %A) {
+; CHECK-LABEL: @test4_apint(
+; CHECK-NEXT:    [[NOTA:%.*]] = and i61 %A, 5
+; CHECK-NEXT:    [[C:%.*]] = xor i61 [[NOTA]], 5
+; CHECK-NEXT:    ret i61 [[C]]
+;
+  %nota = xor i61 %A, -1
+  %c = and i61 %nota, 5    ; 5 = ~c2
+  %notc = xor i61 %c, -1
+  ret i61 %c
+}
+
+; ~(~A | ~B) --> A & B
+
+define i71 @test5_apint(i71 %A, i71 %B) {
+; CHECK-LABEL: @test5_apint(
+; CHECK-NEXT:    [[C_DEMORGAN:%.*]] = and i71 %A, %B
+; CHECK-NEXT:    ret i71 [[C_DEMORGAN]]
+;
+  %nota = xor i71 %A, -1
+  %notb = xor i71 %B, -1
+  %c = or i71 %nota, %notb
+  %notc = xor i71 %c, -1
+  ret i71 %notc
+}
+
+; ~(~A & B) --> (A | ~B)
+
+define i8 @demorgan_nand(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nand(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = or i8 [[B_NOT]], %A
+; CHECK-NEXT:    ret i8 [[NOTC]]
+;
+  %notx = xor i8 %A, -1
+  %c = and i8 %notx, %B
+  %notc = xor i8 %c, -1
+  ret i8 %notc
+}
+
+; ~(~A & B) --> (A | ~B)
+
+define i7 @demorgan_nand_apint1(i7 %A, i7 %B) {
+; CHECK-LABEL: @demorgan_nand_apint1(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i7 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = or i7 [[B_NOT]], %A
+; CHECK-NEXT:    ret i7 [[NOTC]]
+;
+  %nota = xor i7 %A, -1
+  %c = and i7 %nota, %B
+  %notc = xor i7 %c, -1
+  ret i7 %notc
+}
+
+; ~(~A & B) --> (A | ~B)
+
+define i117 @demorgan_nand_apint2(i117 %A, i117 %B) {
+; CHECK-LABEL: @demorgan_nand_apint2(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i117 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = or i117 [[B_NOT]], %A
+; CHECK-NEXT:    ret i117 [[NOTC]]
+;
+  %nota = xor i117 %A, -1
+  %c = and i117 %nota, %B
+  %notc = xor i117 %c, -1
+  ret i117 %notc
+}
+
+; ~(~A | B) --> (A & ~B)
+
+define i8 @demorgan_nor(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = and i8 [[B_NOT]], %A
+; CHECK-NEXT:    ret i8 [[NOTC]]
+;
+  %notx = xor i8 %A, -1
+  %c = or i8 %notx, %B
+  %notc = xor i8 %c, -1
+  ret i8 %notc
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use one of the intermediate results?
+
+define i8 @demorgan_nor_use2a(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2a(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[USE2A:%.*]] = mul i8 [[NOTA]], 23
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = and i8 [[B_NOT]], %A
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[NOTC]], [[USE2A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nota = xor i8 %A, -1
+  %use2a = mul i8 %nota, 23
+  %c = or i8 %nota, %B
+  %notc = xor i8 %c, -1
+  %r = sdiv i8 %notc, %use2a
+  ret i8 %r
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use one of the intermediate results?
+
+define i8 @demorgan_nor_use2b(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2b(
+; CHECK-NEXT:    [[USE2B:%.*]] = mul i8 %B, 23
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = and i8 [[B_NOT]], %A
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[NOTC]], [[USE2B]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %use2b = mul i8 %B, 23
+  %nota = xor i8 %A, -1
+  %c = or i8 %nota, %B
+  %notc = xor i8 %c, -1
+  %r = sdiv i8 %notc, %use2b
+  ret i8 %r
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use one of the intermediate results?
+
+define i8 @demorgan_nor_use2c(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2c(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[C:%.*]] = or i8 [[NOTA]], %B
+; CHECK-NEXT:    [[USE2C:%.*]] = mul i8 [[C]], 23
+; CHECK-NEXT:    [[NOTC:%.*]] = xor i8 [[C]], -1
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[NOTC]], [[USE2C]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nota = xor i8 %A, -1
+  %c = or i8 %nota, %B
+  %use2c = mul i8 %c, 23
+  %notc = xor i8 %c, -1
+  %r = sdiv i8 %notc, %use2c
+  ret i8 %r
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use two of the intermediate results?
+
+define i8 @demorgan_nor_use2ab(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2ab(
+; CHECK-NEXT:    [[USE2B:%.*]] = mul i8 %B, 23
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[USE2A:%.*]] = mul i8 [[NOTA]], 17
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i8 %B, -1
+; CHECK-NEXT:    [[NOTC:%.*]] = and i8 [[B_NOT]], %A
+; CHECK-NEXT:    [[R1:%.*]] = sdiv i8 [[NOTC]], [[USE2B]]
+; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[R1]], [[USE2A]]
+; CHECK-NEXT:    ret i8 [[R2]]
+;
+  %use2b = mul i8 %B, 23
+  %nota = xor i8 %A, -1
+  %use2a = mul i8 %nota, 17
+  %c = or i8 %nota, %B
+  %notc = xor i8 %c, -1
+  %r1 = sdiv i8 %notc, %use2b
+  %r2 = sdiv i8 %r1, %use2a
+  ret i8 %r2
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use two of the intermediate results?
+
+define i8 @demorgan_nor_use2ac(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2ac(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[USE2A:%.*]] = mul i8 [[NOTA]], 17
+; CHECK-NEXT:    [[C:%.*]] = or i8 [[NOTA]], %B
+; CHECK-NEXT:    [[USE2C:%.*]] = mul i8 [[C]], 23
+; CHECK-NEXT:    [[NOTC:%.*]] = xor i8 [[C]], -1
+; CHECK-NEXT:    [[R1:%.*]] = sdiv i8 [[NOTC]], [[USE2C]]
+; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[R1]], [[USE2A]]
+; CHECK-NEXT:    ret i8 [[R2]]
+;
+  %nota = xor i8 %A, -1
+  %use2a = mul i8 %nota, 17
+  %c = or i8 %nota, %B
+  %use2c = mul i8 %c, 23
+  %notc = xor i8 %c, -1
+  %r1 = sdiv i8 %notc, %use2c
+  %r2 = sdiv i8 %r1, %use2a
+  ret i8 %r2
+}
+
+; ~(~A | B) --> (A & ~B) - what if we use two of the intermediate results?
+
+define i8 @demorgan_nor_use2bc(i8 %A, i8 %B) {
+; CHECK-LABEL: @demorgan_nor_use2bc(
+; CHECK-NEXT:    [[USE2B:%.*]] = mul i8 %B, 23
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i8 %A, -1
+; CHECK-NEXT:    [[C:%.*]] = or i8 [[NOTA]], %B
+; CHECK-NEXT:    [[USE2C:%.*]] = mul i8 [[C]], 23
+; CHECK-NEXT:    [[NOTC:%.*]] = xor i8 [[C]], -1
+; CHECK-NEXT:    [[R1:%.*]] = sdiv i8 [[NOTC]], [[USE2C]]
+; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[R1]], [[USE2B]]
+; CHECK-NEXT:    ret i8 [[R2]]
+;
+  %use2b = mul i8 %B, 23
+  %nota = xor i8 %A, -1
+  %c = or i8 %nota, %B
+  %use2c = mul i8 %c, 23
+  %notc = xor i8 %c, -1
+  %r1 = sdiv i8 %notc, %use2c
+  %r2 = sdiv i8 %r1, %use2b
+  ret i8 %r2
+}
+
+; Do not apply DeMorgan's Law to constants. We prefer 'not' ops.
+
+define i32 @demorganize_constant1(i32 %a) {
+; CHECK-LABEL: @demorganize_constant1(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 %a, 15
+; CHECK-NEXT:    [[AND1:%.*]] = xor i32 [[AND]], -1
+; CHECK-NEXT:    ret i32 [[AND1]]
+;
+  %and = and i32 %a, 15
+  %and1 = xor i32 %and, -1
+  ret i32 %and1
+}
+
+; Do not apply DeMorgan's Law to constants. We prefer 'not' ops.
+
+define i32 @demorganize_constant2(i32 %a) {
+; CHECK-LABEL: @demorganize_constant2(
+; CHECK-NEXT:    [[AND:%.*]] = or i32 %a, 15
+; CHECK-NEXT:    [[AND1:%.*]] = xor i32 [[AND]], -1
+; CHECK-NEXT:    ret i32 [[AND1]]
+;
+  %and = or i32 %a, 15
+  %and1 = xor i32 %and, -1
+  ret i32 %and1
+}
+
+; PR22723: Recognize DeMorgan's Laws when obfuscated by zexts.
+
+define i32 @demorgan_or_zext(i1 %X, i1 %Y) {
+; CHECK-LABEL: @demorgan_or_zext(
+; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and i1 %X, %Y
+; CHECK-NEXT:    [[OR1:%.*]] = xor i1 [[OR1_DEMORGAN]], true
+; CHECK-NEXT:    [[OR:%.*]] = zext i1 [[OR1]] to i32
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %zextX = zext i1 %X to i32
+  %zextY = zext i1 %Y to i32
+  %notX  = xor i32 %zextX, 1
+  %notY  = xor i32 %zextY, 1
+  %or    = or i32 %notX, %notY
+  ret i32 %or
+}
+
+define i32 @demorgan_and_zext(i1 %X, i1 %Y) {
+; CHECK-LABEL: @demorgan_and_zext(
+; CHECK-NEXT:    [[AND1_DEMORGAN:%.*]] = or i1 %X, %Y
+; CHECK-NEXT:    [[AND1:%.*]] = xor i1 [[AND1_DEMORGAN]], true
+; CHECK-NEXT:    [[AND:%.*]] = zext i1 [[AND1]] to i32
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %zextX = zext i1 %X to i32
+  %zextY = zext i1 %Y to i32
+  %notX  = xor i32 %zextX, 1
+  %notY  = xor i32 %zextY, 1
+  %and   = and i32 %notX, %notY
+  ret i32 %and
+}
+
+define <2 x i32> @demorgan_or_zext_vec(<2 x i1> %X, <2 x i1> %Y) {
+; CHECK-LABEL: @demorgan_or_zext_vec(
+; CHECK-NEXT:    [[OR1_DEMORGAN:%.*]] = and <2 x i1> %X, %Y
+; CHECK-NEXT:    [[OR1:%.*]] = xor <2 x i1> [[OR1_DEMORGAN]], <i1 true, i1 true>
+; CHECK-NEXT:    [[OR:%.*]] = zext <2 x i1> [[OR1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[OR]]
+;
+  %zextX = zext <2 x i1> %X to <2 x i32>
+  %zextY = zext <2 x i1> %Y to <2 x i32>
+  %notX  = xor <2 x i32> %zextX, <i32 1, i32 1>
+  %notY  = xor <2 x i32> %zextY, <i32 1, i32 1>
+  %or    = or <2 x i32> %notX, %notY
+  ret <2 x i32> %or
+}
+
+define <2 x i32> @demorgan_and_zext_vec(<2 x i1> %X, <2 x i1> %Y) {
+; CHECK-LABEL: @demorgan_and_zext_vec(
+; CHECK-NEXT:    [[AND1_DEMORGAN:%.*]] = or <2 x i1> %X, %Y
+; CHECK-NEXT:    [[AND1:%.*]] = xor <2 x i1> [[AND1_DEMORGAN]], <i1 true, i1 true>
+; CHECK-NEXT:    [[AND:%.*]] = zext <2 x i1> [[AND1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[AND]]
+;
+  %zextX = zext <2 x i1> %X to <2 x i32>
+  %zextY = zext <2 x i1> %Y to <2 x i32>
+  %notX  = xor <2 x i32> %zextX, <i32 1, i32 1>
+  %notY  = xor <2 x i32> %zextY, <i32 1, i32 1>
+  %and   = and <2 x i32> %notX, %notY
+  ret <2 x i32> %and
+}
+
+define i32 @PR28476(i32 %x, i32 %y) {
+; CHECK-LABEL: @PR28476(
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 %y, 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or i1 [[CMP1]], [[CMP0]]
+; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp0 = icmp ne i32 %x, 0
+  %cmp1 = icmp ne i32 %y, 0
+  %and = and i1 %cmp0, %cmp1
+  %zext = zext i1 %and to i32
+  %cond = xor i32 %zext, 1
+  ret i32 %cond
+}
+
+; ~(~(a | b) | (a & b)) --> (a | b) & ~(a & b) -> a ^ b
+
+define i32 @demorgan_plus_and_to_xor(i32 %a, i32 %b) {
+; CHECK-LABEL: @demorgan_plus_and_to_xor(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 %b, %a
+; CHECK-NEXT:    ret i32 [[NOT]]
+;
+  %or = or i32 %b, %a
+  %notor = xor i32 %or, -1
+  %and = and i32 %b, %a
+  %or2 = or i32 %and, %notor
+  %not = xor i32 %or2, -1
+  ret i32 %not
+}
+
+define <4 x i32> @demorgan_plus_and_to_xor_vec(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @demorgan_plus_and_to_xor_vec(
+; CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i32> %a, %b
+; CHECK-NEXT:    ret <4 x i32> [[NOT]]
+;
+  %or = or <4 x i32> %a, %b
+  %notor = xor <4 x i32> %or, < i32 -1, i32 -1, i32 -1, i32 -1 >
+  %and = and <4 x i32> %a, %b
+  %or2 = or <4 x i32> %and, %notor
+  %not = xor <4 x i32> %or2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+  ret <4 x i32> %not
+}
+
diff --git a/llvm/test/Transforms/InstCombine/disable-simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/disable-simplify-libcalls.ll
new file mode 100644
index 00000000000..e25ce31cbfb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/disable-simplify-libcalls.ll
@@ -0,0 +1,335 @@
+; Test that -disable-simplify-libcalls is wired up correctly.
+;
+; RUN: opt < %s -instcombine -disable-simplify-libcalls -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@.str  = constant [1 x i8] zeroinitializer, align 1
+@.str1 = constant [13 x i8] c"hello, world\00", align 1
+@.str2 = constant [4 x i8] c"foo\00", align 1
+@.str3 = constant [4 x i8] c"bar\00", align 1
+@.str4 = constant [6 x i8] c"123.4\00", align 1
+@.str5 = constant [5 x i8] c"1234\00", align 1
+@empty = constant [1 x i8] c"\00", align 1
+
+declare double @ceil(double)
+declare double @copysign(double, double)
+declare double @cos(double)
+declare double @fabs(double)
+declare double @floor(double)
+declare i8* @strcat(i8*, i8*)
+declare i8* @strncat(i8*, i8*, i32)
+declare i8* @strchr(i8*, i32)
+declare i8* @strrchr(i8*, i32)
+declare i32 @strcmp(i8*, i8*)
+declare i32 @strncmp(i8*, i8*, i64)
+declare i8* @strcpy(i8*, i8*)
+declare i8* @stpcpy(i8*, i8*)
+declare i8* @strncpy(i8*, i8*, i64)
+declare i64 @strlen(i8*)
+declare i8* @strpbrk(i8*, i8*)
+declare i64 @strspn(i8*, i8*)
+declare double @strtod(i8*, i8**)
+declare float @strtof(i8*, i8**)
+declare x86_fp80 @strtold(i8*, i8**)
+declare i64 @strtol(i8*, i8**, i32)
+declare i64 @strtoll(i8*, i8**, i32)
+declare i64 @strtoul(i8*, i8**, i32)
+declare i64 @strtoull(i8*, i8**, i32)
+declare i64 @strcspn(i8*, i8*)
+declare i32 @abs(i32)
+declare i32 @ffs(i32)
+declare i32 @ffsl(i64)
+declare i32 @ffsll(i64)
+declare i32 @fprintf(i8*, i8*)
+declare i32 @isascii(i32)
+declare i32 @isdigit(i32)
+declare i32 @toascii(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+declare i32 @printf(i8*)
+declare i32 @sprintf(i8*, i8*)
+
+define double @t1(double %x) {
+; CHECK-LABEL: @t1(
+  %ret = call double @ceil(double %x)
+  ret double %ret
+; CHECK: call double @ceil
+}
+
+define double @t2(double %x, double %y) {
+; CHECK-LABEL: @t2(
+  %ret = call double @copysign(double %x, double %y)
+  ret double %ret
+; CHECK: call double @copysign
+}
+
+define double @t3(double %x) {
+; CHECK-LABEL: @t3(
+  %call = call double @cos(double %x)
+  ret double %call
+; CHECK: call double @cos
+}
+
+define double @t4(double %x) {
+; CHECK-LABEL: @t4(
+  %ret = call double @fabs(double %x)
+  ret double %ret
+; CHECK: call double @fabs
+}
+
+define double @t5(double %x) {
+; CHECK-LABEL: @t5(
+  %ret = call double @floor(double %x)
+  ret double %ret
+; CHECK: call double @floor
+}
+
+define i8* @t6(i8* %x) {
+; CHECK-LABEL: @t6(
+  %empty = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  %ret = call i8* @strcat(i8* %x, i8* %empty)
+  ret i8* %ret
+; CHECK: call i8* @strcat
+}
+
+define i8* @t7(i8* %x) {
+; CHECK-LABEL: @t7(
+  %empty = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  %ret = call i8* @strncat(i8* %x, i8* %empty, i32 1)
+  ret i8* %ret
+; CHECK: call i8* @strncat
+}
+
+define i8* @t8() {
+; CHECK-LABEL: @t8(
+  %x = getelementptr inbounds [13 x i8], [13 x i8]* @.str1, i32 0, i32 0
+  %ret = call i8* @strchr(i8* %x, i32 119)
+  ret i8* %ret
+; CHECK: call i8* @strchr
+}
+
+define i8* @t9() {
+; CHECK-LABEL: @t9(
+  %x = getelementptr inbounds [13 x i8], [13 x i8]* @.str1, i32 0, i32 0
+  %ret = call i8* @strrchr(i8* %x, i32 119)
+  ret i8* %ret
+; CHECK: call i8* @strrchr
+}
+
+define i32 @t10() {
+; CHECK-LABEL: @t10(
+  %x = getelementptr inbounds [4 x i8], [4 x i8]* @.str2, i32 0, i32 0
+  %y = getelementptr inbounds [4 x i8], [4 x i8]* @.str3, i32 0, i32 0
+  %ret = call i32 @strcmp(i8* %x, i8* %y)
+  ret i32 %ret
+; CHECK: call i32 @strcmp
+}
+
+define i32 @t11() {
+; CHECK-LABEL: @t11(
+  %x = getelementptr inbounds [4 x i8], [4 x i8]* @.str2, i32 0, i32 0
+  %y = getelementptr inbounds [4 x i8], [4 x i8]* @.str3, i32 0, i32 0
+  %ret = call i32 @strncmp(i8* %x, i8* %y, i64 3)
+  ret i32 %ret
+; CHECK: call i32 @strncmp
+}
+
+define i8* @t12(i8* %x) {
+; CHECK-LABEL: @t12(
+  %y = getelementptr inbounds [4 x i8], [4 x i8]* @.str2, i32 0, i32 0
+  %ret = call i8* @strcpy(i8* %x, i8* %y)
+  ret i8* %ret
+; CHECK: call i8* @strcpy
+}
+
+define i8* @t13(i8* %x) {
+; CHECK-LABEL: @t13(
+  %y = getelementptr inbounds [4 x i8], [4 x i8]* @.str2, i32 0, i32 0
+  %ret = call i8* @stpcpy(i8* %x, i8* %y)
+  ret i8* %ret
+; CHECK: call i8* @stpcpy
+}
+
+define i8* @t14(i8* %x) {
+; CHECK-LABEL: @t14(
+  %y = getelementptr inbounds [4 x i8], [4 x i8]* @.str2, i32 0, i32 0
+  %ret = call i8* @strncpy(i8* %x, i8* %y, i64 3)
+  ret i8* %ret
+; CHECK: call i8* @strncpy
+}
+
+define i64 @t15() {
+; CHECK-LABEL: @t15(
+  %x = getelementptr inbounds [4 x i8], [4 x i8]* @.str2, i32 0, i32 0
+  %ret = call i64 @strlen(i8* %x)
+  ret i64 %ret
+; CHECK: call i64 @strlen
+}
+
+define i8* @t16(i8* %x) {
+; CHECK-LABEL: @t16(
+  %y = getelementptr inbounds [1 x i8], [1 x i8]* @.str, i32 0, i32 0
+  %ret = call i8* @strpbrk(i8* %x, i8* %y)
+  ret i8* %ret
+; CHECK: call i8* @strpbrk
+}
+
+define i64 @t17(i8* %x) {
+; CHECK-LABEL: @t17(
+  %y = getelementptr inbounds [1 x i8], [1 x i8]* @.str, i32 0, i32 0
+  %ret = call i64 @strspn(i8* %x, i8* %y)
+  ret i64 %ret
+; CHECK: call i64 @strspn
+}
+
+define double @t18(i8** %y) {
+; CHECK-LABEL: @t18(
+  %x = getelementptr inbounds [6 x i8], [6 x i8]* @.str4, i64 0, i64 0
+  %ret = call double @strtod(i8* %x, i8** %y)
+  ret double %ret
+; CHECK: call double @strtod
+}
+
+define float @t19(i8** %y) {
+; CHECK-LABEL: @t19(
+  %x = getelementptr inbounds [6 x i8], [6 x i8]* @.str4, i64 0, i64 0
+  %ret = call float @strtof(i8* %x, i8** %y)
+  ret float %ret
+; CHECK: call float @strtof
+}
+
+define x86_fp80 @t20(i8** %y) {
+; CHECK-LABEL: @t20(
+  %x = getelementptr inbounds [6 x i8], [6 x i8]* @.str4, i64 0, i64 0
+  %ret = call x86_fp80 @strtold(i8* %x, i8** %y)
+  ret x86_fp80 %ret
+; CHECK: call x86_fp80 @strtold
+}
+
+define i64 @t21(i8** %y) {
+; CHECK-LABEL: @t21(
+  %x = getelementptr inbounds [5 x i8], [5 x i8]* @.str5, i64 0, i64 0
+  %ret = call i64 @strtol(i8* %x, i8** %y, i32 10)
+  ret i64 %ret
+; CHECK: call i64 @strtol
+}
+
+define i64 @t22(i8** %y) {
+; CHECK-LABEL: @t22(
+  %x = getelementptr inbounds [5 x i8], [5 x i8]* @.str5, i64 0, i64 0
+  %ret = call i64 @strtoll(i8* %x, i8** %y, i32 10)
+  ret i64 %ret
+; CHECK: call i64 @strtoll
+}
+
+define i64 @t23(i8** %y) {
+; CHECK-LABEL: @t23(
+  %x = getelementptr inbounds [5 x i8], [5 x i8]* @.str5, i64 0, i64 0
+  %ret = call i64 @strtoul(i8* %x, i8** %y, i32 10)
+  ret i64 %ret
+; CHECK: call i64 @strtoul
+}
+
+define i64 @t24(i8** %y) {
+; CHECK-LABEL: @t24(
+  %x = getelementptr inbounds [5 x i8], [5 x i8]* @.str5, i64 0, i64 0
+  %ret = call i64 @strtoull(i8* %x, i8** %y, i32 10)
+  ret i64 %ret
+; CHECK: call i64 @strtoull
+}
+
+define i64 @t25(i8* %y) {
+; CHECK-LABEL: @t25(
+  %x = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  %ret = call i64 @strcspn(i8* %x, i8* %y)
+  ret i64 %ret
+; CHECK: call i64 @strcspn
+}
+
+define i32 @t26(i32 %y) {
+; CHECK-LABEL: @t26(
+  %ret = call i32 @abs(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @abs
+}
+
+define i32 @t27(i32 %y) {
+; CHECK-LABEL: @t27(
+  %ret = call i32 @ffs(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffs
+}
+
+define i32 @t28(i64 %y) {
+; CHECK-LABEL: @t28(
+  %ret = call i32 @ffsl(i64 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffsl
+}
+
+define i32 @t29(i64 %y) {
+; CHECK-LABEL: @t29(
+  %ret = call i32 @ffsll(i64 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffsll
+}
+
+define void @t30() {
+; CHECK-LABEL: @t30(
+  %x = getelementptr inbounds [13 x i8], [13 x i8]* @.str1, i32 0, i32 0
+  call i32 @fprintf(i8* null, i8* %x)
+  ret void
+; CHECK: call i32 @fprintf
+}
+
+define i32 @t31(i32 %y) {
+; CHECK-LABEL: @t31(
+  %ret = call i32 @isascii(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @isascii
+}
+
+define i32 @t32(i32 %y) {
+; CHECK-LABEL: @t32(
+  %ret = call i32 @isdigit(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @isdigit
+}
+
+define i32 @t33(i32 %y) {
+; CHECK-LABEL: @t33(
+  %ret = call i32 @toascii(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @toascii
+}
+
+define i64 @t34(i64 %y) {
+; CHECK-LABEL: @t34(
+  %ret = call i64 @labs(i64 %y)
+  ret i64 %ret
+; CHECK: call i64 @labs
+}
+
+define i64 @t35(i64 %y) {
+; CHECK-LABEL: @t35(
+  %ret = call i64 @llabs(i64 %y)
+  ret i64 %ret
+; CHECK: call i64 @llabs
+}
+
+define void @t36() {
+; CHECK-LABEL: @t36(
+  %x = getelementptr inbounds [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  call i32 @printf(i8* %x)
+  ret void
+; CHECK: call i32 @printf
+}
+
+define void @t37(i8* %x) {
+; CHECK-LABEL: @t37(
+  %y = getelementptr inbounds [13 x i8], [13 x i8]* @.str1, i32 0, i32 0
+  call i32 @sprintf(i8* %x, i8* %y)
+  ret void
+; CHECK: call i32 @sprintf
+}
diff --git a/llvm/test/Transforms/InstCombine/distribute.ll b/llvm/test/Transforms/InstCombine/distribute.ll
new file mode 100644
index 00000000000..e6360f8ba64
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/distribute.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @factorize(i32 %x, i32 %y) {
+; CHECK-LABEL: @factorize(
+; (X | 1) & (X | 2) -> X | (1 & 2) -> X
+  %l = or i32 %x, 1
+  %r = or i32 %x, 2
+  %z = and i32 %l, %r
+  ret i32 %z
+; CHECK: ret i32 %x
+}
+
+define i32 @factorize2(i32 %x) {
+; CHECK-LABEL: @factorize2(
+; 3*X - 2*X -> X
+  %l = mul i32 3, %x
+  %r = mul i32 2, %x
+  %z = sub i32 %l, %r
+  ret i32 %z
+; CHECK: ret i32 %x
+}
+
+define i32 @factorize3(i32 %x, i32 %a, i32 %b) {
+; CHECK-LABEL: @factorize3(
+; (X | (A|B)) & (X | B) -> X | ((A|B) & B) -> X | B
+  %aORb = or i32 %a, %b
+  %l = or i32 %x, %aORb
+  %r = or i32 %x, %b
+  %z = and i32 %l, %r
+  ret i32 %z
+; CHECK: %z = or i32 %b, %x
+; CHECK: ret i32 %z
+}
+
+define i32 @factorize4(i32 %x, i32 %y) {
+; CHECK-LABEL: @factorize4(
+; ((Y << 1) * X) - (X * Y) -> (X * (Y * 2 - Y)) -> (X * Y)
+  %sh = shl i32 %y, 1
+  %ml = mul i32 %sh, %x
+  %mr = mul i32 %x, %y
+  %s = sub i32 %ml, %mr
+  ret i32 %s
+; CHECK: %s = mul i32 %y, %x
+; CHECK: ret i32 %s
+}
+
+define i32 @factorize5(i32 %x, i32 %y) {
+; CHECK-LABEL: @factorize5(
+; ((Y * 2) * X) - (X * Y) -> (X * Y)
+  %sh = mul i32 %y, 2
+  %ml = mul i32 %sh, %x
+  %mr = mul i32 %x, %y
+  %s = sub i32 %ml, %mr
+  ret i32 %s
+; CHECK: %s = mul i32 %y, %x
+; CHECK: ret i32 %s
+}
+
+define i32 @expand(i32 %x) {
+; CHECK-LABEL: @expand(
+; ((X & 1) | 2) & 1 -> ((X & 1) & 1) | (2 & 1) -> (X & 1) | 0 -> X & 1
+  %a = and i32 %x, 1
+  %b = or i32 %a, 2
+  %c = and i32 %b, 1
+  ret i32 %c
+; CHECK: %a = and i32 %x, 1
+; CHECK: ret i32 %a
+}
diff --git a/llvm/test/Transforms/InstCombine/div-shift-crash.ll b/llvm/test/Transforms/InstCombine/div-shift-crash.ll
new file mode 100644
index 00000000000..936173cd6d4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/div-shift-crash.ll
@@ -0,0 +1,101 @@
+; RUN: opt -instcombine < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129 = type <{ i64 }>
+
+; Function Attrs: nounwind
+define void @main() #0 {
+entry:
+  %l_819.i.i = alloca %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129, align 8
+  br i1 undef, label %land.lhs.true, label %for.cond.i
+
+land.lhs.true:                                    ; preds = %entry
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %land.lhs.true, %entry
+  %0 = getelementptr inbounds %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129, %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129* %l_819.i.i, i64 0, i32 0
+  br label %for.cond.i6.i.i
+
+for.cond.i6.i.i:                                  ; preds = %for.body.i8.i.i, %for.cond.i
+  br i1 undef, label %for.body.i8.i.i, label %lbl_707.i.i.i
+
+for.body.i8.i.i:                                  ; preds = %for.cond.i6.i.i
+  br label %for.cond.i6.i.i
+
+lbl_707.i.i.i:                                    ; preds = %for.cond.i6.i.i
+  br i1 undef, label %lor.rhs.i.i.i, label %lor.end.i.i.i
+
+lor.rhs.i.i.i:                                    ; preds = %lbl_707.i.i.i
+  br label %lor.end.i.i.i
+
+lor.end.i.i.i:                                    ; preds = %lor.rhs.i.i.i, %lbl_707.i.i.i
+  br label %for.cond1.i.i.i.i
+
+for.cond1.i.i.i.i:                                ; preds = %for.body4.i.i.i.i, %lor.end.i.i.i
+  br i1 undef, label %for.body4.i.i.i.i, label %func_39.exit.i.i
+
+for.body4.i.i.i.i:                                ; preds = %for.cond1.i.i.i.i
+  br label %for.cond1.i.i.i.i
+
+func_39.exit.i.i:                                 ; preds = %for.cond1.i.i.i.i
+  %l_8191.sroa.0.0.copyload.i.i = load i64, i64* %0, align 1
+  br label %for.cond1.i.i.i
+
+for.cond1.i.i.i:                                  ; preds = %safe_div_func_uint32_t_u_u.exit.i.i.i, %func_39.exit.i.i
+  br i1 undef, label %for.cond7.i.i.i, label %func_11.exit.i
+
+for.cond7.i.i.i:                                  ; preds = %for.end30.i.i.i, %for.cond1.i.i.i
+  %storemerge.i.i.i = phi i32 [ %sub.i.i.i, %for.end30.i.i.i ], [ 4, %for.cond1.i.i.i ]
+  br i1 undef, label %for.cond22.i.i.i, label %for.end32.i.i.i
+
+for.cond22.i.i.i:                                 ; preds = %for.body25.i.i.i, %for.cond7.i.i.i
+  br i1 undef, label %for.body25.i.i.i, label %for.end30.i.i.i
+
+for.body25.i.i.i:                                 ; preds = %for.cond22.i.i.i
+  br label %for.cond22.i.i.i
+
+for.end30.i.i.i:                                  ; preds = %for.cond22.i.i.i
+  %sub.i.i.i = add nsw i32 0, -1
+  br label %for.cond7.i.i.i
+
+for.end32.i.i.i:                                  ; preds = %for.cond7.i.i.i
+  %conv33.i.i.i = trunc i64 %l_8191.sroa.0.0.copyload.i.i to i32
+  %xor.i.i.i.i = xor i32 %storemerge.i.i.i, -701565022
+  %sub.i.i.i.i = sub nsw i32 0, %storemerge.i.i.i
+  %xor3.i.i.i.i = xor i32 %sub.i.i.i.i, %storemerge.i.i.i
+  %and4.i.i.i.i = and i32 %xor.i.i.i.i, %xor3.i.i.i.i
+  %cmp.i.i.i.i = icmp slt i32 %and4.i.i.i.i, 0
+  %sub5.i.i.i.i = sub nsw i32 -701565022, %storemerge.i.i.i
+  %.sub5.i.i.i.i = select i1 %cmp.i.i.i.i, i32 -701565022, i32 %sub5.i.i.i.i
+  br i1 undef, label %safe_div_func_uint32_t_u_u.exit.i.i.i, label %cond.false.i.i.i.i
+
+cond.false.i.i.i.i:                               ; preds = %for.end32.i.i.i
+  %div.i.i.i.i = udiv i32 %conv33.i.i.i, %.sub5.i.i.i.i
+  br label %safe_div_func_uint32_t_u_u.exit.i.i.i
+
+safe_div_func_uint32_t_u_u.exit.i.i.i:            ; preds = %cond.false.i.i.i.i, %for.end32.i.i.i
+  %cond.i.i.i.i = phi i32 [ %div.i.i.i.i, %cond.false.i.i.i.i ], [ %conv33.i.i.i, %for.end32.i.i.i ]
+  %cmp35.i.i.i = icmp ne i32 %cond.i.i.i.i, -7
+  br label %for.cond1.i.i.i
+
+func_11.exit.i:                                   ; preds = %for.cond1.i.i.i
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %func_11.exit.i
+  unreachable
+
+for.end:                                          ; preds = %func_11.exit.i
+  br label %for.cond15
+
+for.cond15:                                       ; preds = %for.cond19, %for.end
+  br i1 undef, label %for.cond19, label %for.end45
+
+for.cond19:                                       ; preds = %for.cond15
+  br label %for.cond15
+
+for.end45:                                        ; preds = %for.cond15
+  unreachable
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll
new file mode 100644
index 00000000000..7d84fd6fdd3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/div-shift.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @t1(i16 zeroext %x, i32 %y) {
+; CHECK-LABEL: @t1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[D:%.*]] = lshr i32 [[CONV]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+entry:
+  %conv = zext i16 %x to i32
+  %s = shl i32 2, %y
+  %d = sdiv i32 %conv, %s
+  ret i32 %d
+}
+
+define <2 x i32> @t1vec(<2 x i16> %x, <2 x i32> %y) {
+; CHECK-LABEL: @t1vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = add <2 x i32> [[Y:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[D:%.*]] = lshr <2 x i32> [[CONV]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i32> [[D]]
+;
+entry:
+  %conv = zext <2 x i16> %x to <2 x i32>
+  %s = shl <2 x i32> <i32 2, i32 2>, %y
+  %d = sdiv <2 x i32> %conv, %s
+  ret <2 x i32> %d
+}
+
+; rdar://11721329
+define i64 @t2(i64 %x, i32 %y) {
+; CHECK-LABEL: @t2(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[X:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %1 = shl i32 1, %y
+  %2 = zext i32 %1 to i64
+  %3 = udiv i64 %x, %2
+  ret i64 %3
+}
+
+; PR13250
+define i64 @t3(i64 %x, i32 %y) {
+; CHECK-LABEL: @t3(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i64 [[X:%.*]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %1 = shl i32 4, %y
+  %2 = zext i32 %1 to i64
+  %3 = udiv i64 %x, %2
+  ret i64 %3
+}
+
+define i32 @t4(i32 %x, i32 %y) {
+; CHECK-LABEL: @t4(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[Y:%.*]], 5
+; CHECK-NEXT:    [[DOTV:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 5
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[X:%.*]], [[DOTV]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %1 = shl i32 1, %y
+  %2 = icmp ult i32 %1, 32
+  %3 = select i1 %2, i32 32, i32 %1
+  %4 = udiv i32 %x, %3
+  ret i32 %4
+}
+
+define i32 @t5(i1 %x, i1 %y, i32 %V) {
+; CHECK-LABEL: @t5(
+; CHECK-NEXT:    [[DOTV:%.*]] = select i1 [[X:%.*]], i32 5, i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[V:%.*]], [[DOTV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[Y:%.*]], i32 [[TMP1]], i32 0
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %1 = shl i32 1, %V
+  %2 = select i1 %x, i32 32, i32 64
+  %3 = select i1 %y, i32 %2, i32 %1
+  %4 = udiv i32 %V, %3
+  ret i32 %4
+}
+
+define i32 @t6(i32 %x, i32 %z) {
+; CHECK-LABEL: @t6(
+; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[DIVISOR:%.*]] = select i1 [[X_IS_ZERO]], i32 1, i32 [[X]]
+; CHECK-NEXT:    [[Y:%.*]] = udiv i32 [[Z:%.*]], [[DIVISOR]]
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %x_is_zero = icmp eq i32 %x, 0
+  %divisor = select i1 %x_is_zero, i32 1, i32 %x
+  %y = udiv i32 %z, %divisor
+  ret i32 %y
+}
+
+; (X << C1) / X -> 1 << C1 optimizations
+
+define i32 @t7(i32 %x) {
+; CHECK-LABEL: @t7(
+; CHECK-NEXT:    ret i32 4
+;
+  %shl = shl nsw i32 %x, 2
+  %r = sdiv i32 %shl, %x
+  ret i32 %r
+}
+
+; make sure the previous opt doesn't take place for wrapped shifts
+
+define i32 @t8(i32 %x) {
+; CHECK-LABEL: @t8(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[R:%.*]] = sdiv i32 [[SHL]], [[X]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %shl = shl i32 %x, 2
+  %r = sdiv i32 %shl, %x
+  ret i32 %r
+}
+
+define <2 x i32> @t9(<2 x i32> %x) {
+; CHECK-LABEL: @t9(
+; CHECK-NEXT:    ret <2 x i32> <i32 4, i32 8>
+;
+  %shl = shl nsw <2 x i32> %x, <i32 2, i32 3>
+  %r = sdiv <2 x i32> %shl, %x
+  ret <2 x i32> %r
+}
+
+define i32 @t10(i32 %x, i32 %y) {
+; CHECK-LABEL: @t10(
+; CHECK-NEXT:    [[R:%.*]] = shl nsw i32 1, [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %shl = shl nsw i32 %x, %y
+  %r = sdiv i32 %shl, %x
+  ret i32 %r
+}
+
+define <2 x i32> @t11(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @t11(
+; CHECK-NEXT:    [[R:%.*]] = shl nsw <2 x i32> <i32 1, i32 1>, [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %shl = shl nsw <2 x i32> %x, %y
+  %r = sdiv <2 x i32> %shl, %x
+  ret <2 x i32> %r
+}
+
+define i32 @t12(i32 %x) {
+; CHECK-LABEL: @t12(
+; CHECK-NEXT:    ret i32 4
+;
+  %shl = shl nuw i32 %x, 2
+  %r = udiv i32 %shl, %x
+  ret i32 %r
+}
+
+; make sure the previous opt doesn't take place for wrapped shifts
+
+define i32 @t13(i32 %x) {
+; CHECK-LABEL: @t13(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[R:%.*]] = udiv i32 [[SHL]], [[X]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %shl = shl i32 %x, 2
+  %r = udiv i32 %shl, %x
+  ret i32 %r
+}
+
+define <2 x i32> @t14(<2 x i32> %x) {
+; CHECK-LABEL: @t14(
+; CHECK-NEXT:    ret <2 x i32> <i32 4, i32 8>
+;
+  %shl = shl nuw <2 x i32> %x, <i32 2, i32 3>
+  %r = udiv <2 x i32> %shl, %x
+  ret <2 x i32> %r
+}
+
+define i32 @t15(i32 %x, i32 %y) {
+; CHECK-LABEL: @t15(
+; CHECK-NEXT:    [[R:%.*]] = shl nuw i32 1, [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %shl = shl nuw i32 %x, %y
+  %r = udiv i32 %shl, %x
+  ret i32 %r
+}
+
+define <2 x i32> @t16(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @t16(
+; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %shl = shl nuw <2 x i32> %x, %y
+  %r = udiv <2 x i32> %shl, %x
+  ret <2 x i32> %r
+}
diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll
new file mode 100644
index 00000000000..4c4308151e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/div.ll
@@ -0,0 +1,1049 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; This test makes sure that div instructions are properly eliminated.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = sdiv i32 %A, 1
+  ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B:%.*]] = lshr i32 [[A:%.*]], 3
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = udiv i32 %A, 8
+  ret i32 %B
+}
+
+define i32 @sdiv_by_minus1(i32 %A) {
+; CHECK-LABEL: @sdiv_by_minus1(
+; CHECK-NEXT:    [[B:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = sdiv i32 %A, -1
+  ret i32 %B
+}
+
+define <2 x i64> @sdiv_by_minus1_vec(<2 x i64> %x) {
+; CHECK-LABEL: @sdiv_by_minus1_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i64> [[DIV]]
+;
+  %div = sdiv <2 x i64> %x, <i64 -1, i64 -1>
+  ret <2 x i64> %div
+}
+
+define <2 x i64> @sdiv_by_minus1_vec_undef_elt(<2 x i64> %x) {
+; CHECK-LABEL: @sdiv_by_minus1_vec_undef_elt(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %div = sdiv <2 x i64> %x, <i64 -1, i64 undef>
+  ret <2 x i64> %div
+}
+
+define i32 @sdiv_by_sext_minus1(i1 %x, i32 %y) {
+; CHECK-LABEL: @sdiv_by_sext_minus1(
+; CHECK-NEXT:    [[DIV:%.*]] = sub i32 0, [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %sext = sext i1 %x to i32
+  %div = sdiv i32 %y, %sext
+  ret i32 %div
+}
+
+define <2 x i32> @sdiv_by_sext_minus1_vec(<2 x i1> %x, <2 x i32> %y) {
+; CHECK-LABEL: @sdiv_by_sext_minus1_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = sub <2 x i32> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[DIV]]
+;
+  %sext = sext <2 x i1> %x to <2 x i32>
+  %div = sdiv <2 x i32> %y, %sext
+  ret <2 x i32> %div
+}
+
+define i8 @udiv_by_negative(i8 %x) {
+; CHECK-LABEL: @udiv_by_negative(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], -7
+; CHECK-NEXT:    [[A:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %A = udiv i8 %x, 250
+  ret i8 %A
+}
+
+define i32 @udiv_by_minus1(i32 %A) {
+; CHECK-LABEL: @udiv_by_minus1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[B:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = udiv i32 %A, -1
+  ret i32 %B
+}
+
+define <2 x i64> @udiv_by_minus1_vec(<2 x i64> %x) {
+; CHECK-LABEL: @udiv_by_minus1_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[DIV]]
+;
+  %div = udiv <2 x i64> %x, <i64 -1, i64 -1>
+  ret <2 x i64> %div
+}
+
+define i32 @udiv_by_sext_all_ones(i1 %x, i32 %y) {
+; CHECK-LABEL: @udiv_by_sext_all_ones(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[DIV:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %sext = sext i1 %x to i32
+  %div = udiv i32 %y, %sext
+  ret i32 %div
+}
+
+define <2 x i32> @udiv_by_sext_all_ones_vec(<2 x i1> %x, <2 x i32> %y) {
+; CHECK-LABEL: @udiv_by_sext_all_ones_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[Y:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[DIV]]
+;
+  %sext = sext <2 x i1> %x to <2 x i32>
+  %div = udiv <2 x i32> %y, %sext
+  ret <2 x i32> %div
+}
+
+define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = udiv i32 %A, -16
+  %C = udiv i32 %B, -4
+  ret i32 %C
+}
+
+define i1 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], 123
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = udiv i32 %A, 123
+  ; A < 123
+  %C = icmp eq i32 %B, 0
+  ret i1 %C
+}
+
+define i1 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -20
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A_OFF]], 10
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = udiv i32 %A, 10
+  ; A >= 20 && A < 30
+  %C = icmp eq i32 %B, 2
+  ret i1 %C
+}
+
+define <2 x i1> @test7vec(<2 x i32> %A) {
+; CHECK-LABEL: @test7vec(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add <2 x i32> [[A:%.*]], <i32 -20, i32 -20>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A_OFF]], <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %B = udiv <2 x i32> %A, <i32 10, i32 10>
+  %C = icmp eq <2 x i32> %B, <i32 2, i32 2>
+  ret <2 x i1> %C
+}
+
+define i1 @test8(i8 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[A:%.*]], -11
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = udiv i8 %A, 123
+  ; A >= 246
+  %C = icmp eq i8 %B, 2
+  ret i1 %C
+}
+
+define <2 x i1> @test8vec(<2 x i8> %A) {
+; CHECK-LABEL: @test8vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i8> [[A:%.*]], <i8 -11, i8 -11>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = udiv <2 x i8> %A, <i8 123, i8 123>
+  %C = icmp eq <2 x i8> %B, <i8 2, i8 2>
+  ret <2 x i1> %C
+}
+
+define i1 @test9(i8 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[A:%.*]], -10
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = udiv i8 %A, 123
+  ; A < 246
+  %C = icmp ne i8 %B, 2
+  ret i1 %C
+}
+
+define <2 x i1> @test9vec(<2 x i8> %A) {
+; CHECK-LABEL: @test9vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], <i8 -10, i8 -10>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = udiv <2 x i8> %A, <i8 123, i8 123>
+  %C = icmp ne <2 x i8> %B, <i8 2, i8 2>
+  ret <2 x i1> %C
+}
+
+define i32 @test10(i32 %X, i1 %C) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[R_V:%.*]] = select i1 [[C:%.*]], i32 6, i32 3
+; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], [[R_V]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %V = select i1 %C, i32 64, i32 8
+  %R = udiv i32 %X, %V
+  ret i32 %R
+}
+
+define i32 @test11(i32 %X, i1 %C) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[B_V:%.*]] = select i1 [[C:%.*]], i32 10, i32 5
+; CHECK-NEXT:    [[B:%.*]] = lshr i32 [[X:%.*]], [[B_V]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = select i1 %C, i32 1024, i32 32
+  %B = udiv i32 %X, %A
+  ret i32 %B
+}
+
+; PR2328
+define i32 @test12(i32 %x) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    ret i32 1
+;
+  %tmp3 = udiv i32 %x, %x		; 1
+  ret i32 %tmp3
+}
+
+define i32 @test13(i32 %x) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret i32 1
+;
+  %tmp3 = sdiv i32 %x, %x		; 1
+  ret i32 %tmp3
+}
+
+define i32 @test14(i8 %x) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    ret i32 0
+;
+  %zext = zext i8 %x to i32
+  %div = udiv i32 %zext, 257	; 0
+  ret i32 %div
+}
+
+; PR9814
+define i32 @test15(i32 %a, i32 %b) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], -2
+; CHECK-NEXT:    [[DIV2:%.*]] = lshr i32 [[A:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[DIV2]]
+;
+  %shl = shl i32 1, %b
+  %div = lshr i32 %shl, 2
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+}
+
+define <2 x i64> @test16(<2 x i64> %x) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <2 x i64> [[X:%.*]], <i64 192, i64 192>
+; CHECK-NEXT:    ret <2 x i64> [[DIV]]
+;
+  %shr = lshr <2 x i64> %x, <i64 5, i64 5>
+  %div = udiv <2 x i64> %shr, <i64 6, i64 6>
+  ret <2 x i64> %div
+}
+
+define i32 @test19(i32 %x) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[A:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %A = udiv i32 1, %x
+  ret i32 %A
+}
+
+define <2 x i32> @test19vec(<2 x i32> %x) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[A:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+  %A = udiv <2 x i32> <i32 1, i32 1>, %x
+  ret <2 x i32> %A
+}
+
+define i32 @test20(i32 %x) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 0
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %A = sdiv i32 1, %x
+  ret i32 %A
+}
+
+define <2 x i32> @test20vec(<2 x i32> %x) {
+; CHECK-LABEL: @test20vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    [[A:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[X]], <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+  %A = sdiv <2 x i32> <i32 1, i32 1>, %x
+  ret <2 x i32> %A
+}
+
+define i32 @test21(i32 %a) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[A:%.*]], 3
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 12
+  ret i32 %div
+}
+
+define i32 @test22(i32 %a) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[A:%.*]], 4
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %mul = mul nsw i32 %a, 3
+  %div = sdiv i32 %mul, 12
+  ret i32 %div
+}
+
+define i32 @test23(i32 %a) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[A:%.*]], 3
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 12
+  ret i32 %div
+}
+
+define i32 @test24(i32 %a) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[DIV:%.*]] = lshr i32 [[A:%.*]], 2
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %mul = mul nuw i32 %a, 3
+  %div = udiv i32 %mul, 12
+  ret i32 %div
+}
+
+define i32 @test25(i32 %a) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[DIV:%.*]] = shl nsw i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 2
+  ret i32 %div
+}
+
+define i32 @test26(i32 %a) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[DIV:%.*]] = shl nsw i32 [[A:%.*]], 2
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %mul = mul nsw i32 %a, 12
+  %div = sdiv i32 %mul, 3
+  ret i32 %div
+}
+
+define i32 @test27(i32 %a) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[DIV:%.*]] = shl nuw i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 2
+  ret i32 %div
+}
+
+define i32 @test28(i32 %a) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[DIV:%.*]] = mul nuw i32 [[A:%.*]], 12
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %mul = mul nuw i32 %a, 36
+  %div = udiv i32 %mul, 3
+  ret i32 %div
+}
+
+define i32 @test29(i32 %a) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[MUL_LOBIT:%.*]] = and i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[MUL_LOBIT]]
+;
+  %mul = shl nsw i32 %a, 31
+  %div = sdiv i32 %mul, -2147483648
+  ret i32 %div
+}
+
+define i32 @test30(i32 %a) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %mul = shl nuw i32 %a, 31
+  %div = udiv i32 %mul, -2147483648
+  ret i32 %div
+}
+
+define <2 x i32> @test31(<2 x i32> %x) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    ret <2 x i32> zeroinitializer
+;
+  %shr = lshr <2 x i32> %x, <i32 31, i32 31>
+  %div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
+  ret <2 x i32> %div
+}
+
+define i32 @test32(i32 %a, i32 %b) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 2, [[B:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = lshr i32 [[SHL]], 2
+; CHECK-NEXT:    [[DIV2:%.*]] = udiv i32 [[A:%.*]], [[DIV]]
+; CHECK-NEXT:    ret i32 [[DIV2]]
+;
+  %shl = shl i32 2, %b
+  %div = lshr i32 %shl, 2
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+}
+
+define <2 x i64> @test33(<2 x i64> %x) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv exact <2 x i64> [[X:%.*]], <i64 192, i64 192>
+; CHECK-NEXT:    ret <2 x i64> [[DIV]]
+;
+  %shr = lshr exact <2 x i64> %x, <i64 5, i64 5>
+  %div = udiv exact <2 x i64> %shr, <i64 6, i64 6>
+  ret <2 x i64> %div
+}
+
+; -X / C --> X / -C (if negation does not overflow)
+
+define i8 @sdiv_negated_dividend_constant_divisor(i8 %x) {
+; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor(
+; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[X:%.*]], 42
+; CHECK-NEXT:    ret i8 [[D]]
+;
+  %neg = sub nsw i8 0, %x
+  %d = sdiv i8 %neg, -42
+  ret i8 %d
+}
+
+define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_splat(
+; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    ret <2 x i8> [[D]]
+;
+  %neg = sub nsw <2 x i8> zeroinitializer, %x
+  %d = sdiv <2 x i8> %neg, <i8 -42, i8 -42>
+  ret <2 x i8> %d
+}
+
+define i8 @sdiv_exact_negated_dividend_constant_divisor(i8 %x) {
+; CHECK-LABEL: @sdiv_exact_negated_dividend_constant_divisor(
+; CHECK-NEXT:    [[D:%.*]] = sdiv exact i8 [[X:%.*]], 42
+; CHECK-NEXT:    ret i8 [[D]]
+;
+  %neg = sub nsw i8 0, %x
+  %d = sdiv exact i8 %neg, -42
+  ret i8 %d
+}
+
+define <2 x i8> @sdiv_exact_negated_dividend_constant_divisor_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_exact_negated_dividend_constant_divisor_vec_splat(
+; CHECK-NEXT:    [[D:%.*]] = sdiv exact <2 x i8> [[X:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    ret <2 x i8> [[D]]
+;
+  %neg = sub nsw <2 x i8> zeroinitializer, %x
+  %d = sdiv exact <2 x i8> %neg, <i8 -42, i8 -42>
+  ret <2 x i8> %d
+}
+
+define i8 @sdiv_negated_dividend_constant_divisor_smin(i8 %x) {
+; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_smin(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], -128
+; CHECK-NEXT:    [[D:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[D]]
+;
+  %neg = sub nsw i8 0, %x
+  %d = sdiv i8 %neg, -128
+  ret i8 %d
+}
+
+define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_splat_smin(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_splat_smin(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[D]]
+;
+  %neg = sub nsw <2 x i8> zeroinitializer, %x
+  %d = sdiv <2 x i8> %neg, <i8 -128, i8 -128>
+  ret <2 x i8> %d
+}
+
+define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_undef(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_undef(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %neg = sub nsw <2 x i8> zeroinitializer, %x
+  %d = sdiv <2 x i8> %neg, <i8 -128, i8 undef>
+  ret <2 x i8> %d
+}
+
+define <2 x i64> @sdiv_negated_dividend_constant_divisor_vec(<2 x i64> %x) {
+; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec(
+; CHECK-NEXT:    [[DIV1:%.*]] = sdiv <2 x i64> [[X:%.*]], <i64 3, i64 4>
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i64> zeroinitializer, [[DIV1]]
+; CHECK-NEXT:    ret <2 x i64> [[DIV]]
+;
+  %neg = sub nsw <2 x i64> zeroinitializer, %x
+  %div = sdiv <2 x i64> %neg, <i64 3, i64 4>
+  ret <2 x i64> %div
+}
+
+define <2 x i64> @sdiv_exact_negated_dividend_constant_divisor_vec(<2 x i64> %x) {
+; CHECK-LABEL: @sdiv_exact_negated_dividend_constant_divisor_vec(
+; CHECK-NEXT:    [[DIV1:%.*]] = sdiv exact <2 x i64> [[X:%.*]], <i64 3, i64 4>
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i64> zeroinitializer, [[DIV1]]
+; CHECK-NEXT:    ret <2 x i64> [[DIV]]
+;
+  %neg = sub nsw <2 x i64> zeroinitializer, %x
+  %div = sdiv exact <2 x i64> %neg, <i64 3, i64 4>
+  ret <2 x i64> %div
+}
+
+; Can't negate signed min vector element.
+
+define <2 x i8> @sdiv_exact_negated_dividend_constant_divisor_vec_overflow(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_exact_negated_dividend_constant_divisor_vec_overflow(
+; CHECK-NEXT:    [[DIV1:%.*]] = sdiv exact <2 x i8> [[X:%.*]], <i8 -128, i8 42>
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i8> zeroinitializer, [[DIV1]]
+; CHECK-NEXT:    ret <2 x i8> [[DIV]]
+;
+  %neg = sub nsw <2 x i8> zeroinitializer, %x
+  %div = sdiv exact <2 x i8> %neg, <i8 -128, i8 42>
+  ret <2 x i8> %div
+}
+
+define i32 @test35(i32 %A) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 2147483647
+; CHECK-NEXT:    [[MUL:%.*]] = udiv exact i32 [[AND]], 2147483647
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %and = and i32 %A, 2147483647
+  %mul = sdiv exact i32 %and, 2147483647
+  ret i32 %mul
+}
+
+define <2 x i32> @test35vec(<2 x i32> %A) {
+; CHECK-LABEL: @test35vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    ret <2 x i32> [[MUL]]
+;
+  %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
+  %mul = sdiv exact <2 x i32> %and, <i32 2147483647, i32 2147483647>
+  ret <2 x i32> %mul
+}
+
+define i32 @test36(i32 %A) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 2147483647
+; CHECK-NEXT:    [[MUL:%.*]] = lshr exact i32 [[AND]], [[A]]
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %and = and i32 %A, 2147483647
+  %shl = shl nsw i32 1, %A
+  %mul = sdiv exact i32 %and, %shl
+  ret i32 %mul
+}
+
+define <2 x i32> @test36vec(<2 x i32> %A) {
+; CHECK-LABEL: @test36vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[MUL:%.*]] = lshr exact <2 x i32> [[AND]], [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[MUL]]
+;
+  %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
+  %shl = shl nsw <2 x i32> <i32 1, i32 1>, %A
+  %mul = sdiv exact <2 x i32> %and, %shl
+  ret <2 x i32> %mul
+}
+
+define i32 @test37(i32* %b) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 0, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    br i1 undef, label [[LOR_RHS:%.*]], label [[LOR_END:%.*]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  store i32 0, i32* %b, align 4
+  %0 = load i32, i32* %b, align 4
+  br i1 undef, label %lor.rhs, label %lor.end
+
+lor.rhs:                                          ; preds = %entry
+  %mul = mul nsw i32 undef, %0
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %entry
+  %t.0 = phi i32 [ %0, %entry ], [ %mul, %lor.rhs ]
+  %div = sdiv i32 %t.0, 2
+  ret i32 %div
+}
+
+; We can perform the division in the smaller type.
+
+define i32 @shrink(i8 %x) {
+; CHECK-LABEL: @shrink(
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv i8 [[X:%.*]], 127
+; CHECK-NEXT:    [[DIV:%.*]] = sext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %conv = sext i8 %x to i32
+  %div = sdiv i32 %conv, 127
+  ret i32 %div
+}
+
+; Division in the smaller type can lead to more optimizations.
+
+define i32 @zap(i8 %x) {
+; CHECK-LABEL: @zap(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], -128
+; CHECK-NEXT:    [[DIV:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %conv = sext i8 %x to i32
+  %div = sdiv i32 %conv, -128
+  ret i32 %div
+}
+
+; Splat constant divisors should get the same folds.
+
+define <3 x i32> @shrink_vec(<3 x i8> %x) {
+; CHECK-LABEL: @shrink_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <3 x i8> [[X:%.*]], <i8 127, i8 127, i8 127>
+; CHECK-NEXT:    [[DIV:%.*]] = sext <3 x i8> [[TMP1]] to <3 x i32>
+; CHECK-NEXT:    ret <3 x i32> [[DIV]]
+;
+  %conv = sext <3 x i8> %x to <3 x i32>
+  %div = sdiv <3 x i32> %conv, <i32 127, i32 127, i32 127>
+  ret <3 x i32> %div
+}
+
+define <2 x i32> @zap_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zap_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[DIV]]
+;
+  %conv = sext <2 x i8> %x to <2 x i32>
+  %div = sdiv <2 x i32> %conv, <i32 -128, i32 -128>
+  ret <2 x i32> %div
+}
+
+; But we can't do this if the signed constant won't fit in the original type.
+
+define i32 @shrink_no(i8 %x) {
+; CHECK-LABEL: @shrink_no(
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[CONV]], 128
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %conv = sext i8 %x to i32
+  %div = sdiv i32 %conv, 128
+  ret i32 %div
+}
+
+; When the divisor is known larger than the quotient,
+; InstSimplify should kill it before InstCombine sees it.
+
+define i32 @shrink_no2(i8 %x) {
+; CHECK-LABEL: @shrink_no2(
+; CHECK-NEXT:    ret i32 0
+;
+  %conv = sext i8 %x to i32
+  %div = sdiv i32 %conv, -129
+  ret i32 %div
+}
+
+define i32 @shrink_no3(i16 %x) {
+; CHECK-LABEL: @shrink_no3(
+; CHECK-NEXT:    ret i32 0
+;
+  %conv = sext i16 %x to i32
+  %div = sdiv i32 %conv, 65535
+  ret i32 %div
+}
+
+; This previously crashed when trying to simplify the zext/icmp this becomes.
+define <2 x i8> @PR34841(<2 x i8> %x) {
+; CHECK-LABEL: @PR34841(
+; CHECK-NEXT:    ret <2 x i8> zeroinitializer
+;
+  %neg = and <2 x i8> %x, <i8 2, i8 2>
+  %div = udiv <2 x i8> <i8 1, i8 1>, %neg
+  ret <2 x i8> %div
+}
+
+; X / (X * Y) -> 1 / Y if the multiplication does not overflow
+
+define i8 @div_factor_signed(i8 %x, i8 %y) {
+; CHECK-LABEL: @div_factor_signed(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 3
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP2]], i8 [[Y]], i8 0
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a = mul nsw i8 %x, %y
+  %r = sdiv i8 %x, %a
+  ret i8 %r
+}
+
+; X / (Y * X) -> 1 / Y if the multiplication does not overflow
+
+define <2 x i8> @div_factor_signed_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @div_factor_signed_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 3, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i8> [[Y]], <2 x i8> zeroinitializer
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a = mul nsw <2 x i8> %y, %x
+  %r = sdiv <2 x i8> %x, %a
+  ret <2 x i8> %r
+}
+
+; X / (Y * X) -> 1 / Y if the multiplication does not overflow
+
+define i8 @div_factor_unsigned(i8 %x, i8 %y) {
+; CHECK-LABEL: @div_factor_unsigned(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[Y:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a = mul nuw i8 %y, %x
+  %r = udiv i8 %x, %a
+  ret i8 %r
+}
+
+; X / (X * Y) -> 1 / Y if the multiplication does not overflow
+
+define <2 x i8> @div_factor_unsigned_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @div_factor_unsigned_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[Y:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a = mul nuw <2 x i8> %x, %y
+  %r = udiv <2 x i8> %x, %a
+  ret <2 x i8> %r
+}
+
+define i8 @udiv_common_factor(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @udiv_common_factor(
+; CHECK-NEXT:    [[C:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = mul nuw i8 %z, %x
+  %b = mul nuw i8 %z, %y
+  %c = udiv i8 %a, %b
+  ret i8 %c
+}
+
+define <2 x i8> @udiv_common_factor_commute1_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @udiv_common_factor_commute1_vec(
+; CHECK-NEXT:    [[C:%.*]] = udiv <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %a = mul nuw <2 x i8> %x, %z
+  %b = mul nuw <2 x i8> %z, %y
+  %c = udiv <2 x i8> %a, %b
+  ret <2 x i8> %c
+}
+
+define i8 @udiv_common_factor_commute2(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @udiv_common_factor_commute2(
+; CHECK-NEXT:    [[C:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = mul nuw i8 %x, %z
+  %b = mul nuw i8 %y, %z
+  %c = udiv i8 %a, %b
+  ret i8 %c
+}
+
+define i8 @udiv_common_factor_commute3(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @udiv_common_factor_commute3(
+; CHECK-NEXT:    [[C:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = mul nuw i8 %z, %x
+  %b = mul nuw i8 %y, %z
+  %c = udiv i8 %a, %b
+  ret i8 %c
+}
+
+; Negative test: both mul must be 'nuw'.
+
+define i8 @udiv_common_factor_not_nuw(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @udiv_common_factor_not_nuw(
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[Z:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = mul nuw i8 [[Z]], [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = udiv i8 [[A]], [[B]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = mul i8 %z, %x
+  %b = mul nuw i8 %z, %y
+  %c = udiv i8 %a, %b
+  ret i8 %c
+}
+
+; Negative test: both mul must be 'nuw'.
+
+define <2 x i8> @udiv_common_factor_not_nuw_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @udiv_common_factor_not_nuw_vec(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw <2 x i8> [[Z:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = mul <2 x i8> [[Z]], [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = udiv <2 x i8> [[A]], [[B]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %a = mul nuw <2 x i8> %z, %x
+  %b = mul <2 x i8> %z, %y
+  %c = udiv <2 x i8> %a, %b
+  ret <2 x i8> %c
+}
+
+define i32 @test_exact_nsw_exact(i32 %x) {
+; CHECK-LABEL: @test_exact_nsw_exact(
+; CHECK-NEXT:    [[NEG:%.*]] = sdiv exact i32 [[X:%.*]], -3
+; CHECK-NEXT:    ret i32 [[NEG]]
+;
+  %div = sdiv exact i32 %x, 3
+  %neg = sub nsw i32 0, %div
+  ret i32 %neg
+}
+
+define <2 x i64> @test_exact_vec(<2 x i64> %x) {
+; CHECK-LABEL: @test_exact_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv exact <2 x i64> [[X:%.*]], <i64 3, i64 4>
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw <2 x i64> zeroinitializer, [[DIV]]
+; CHECK-NEXT:    ret <2 x i64> [[NEG]]
+;
+  %div = sdiv exact <2 x i64> %x, <i64 3, i64 4>
+  %neg = sub nsw <2 x i64> zeroinitializer, %div
+  ret <2 x i64> %neg
+}
+
+; Constant is safe to negate.
+
+define <2 x i8> @negate_sdiv_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @negate_sdiv_vec_splat(
+; CHECK-NEXT:    [[NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 -42, i8 -42>
+; CHECK-NEXT:    ret <2 x i8> [[NEG]]
+;
+  %div = sdiv <2 x i8> %x, <i8 42, i8 42>
+  %neg = sub <2 x i8> zeroinitializer, %div
+  ret <2 x i8> %neg
+}
+
+; Dividing by undef is UB.
+
+define <2 x i8> @negate_sdiv_vec_undef_elt(<2 x i8> %x) {
+; CHECK-LABEL: @negate_sdiv_vec_undef_elt(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %div = sdiv <2 x i8> %x, <i8 undef, i8 42>
+  %neg = sub <2 x i8> zeroinitializer, %div
+  ret <2 x i8> %neg
+}
+
+; Division by -1 may be UB (if numerator is the signed min val), but div-by-1 can be simplified.
+
+define <2 x i8> @negate_sdiv_vec_splat_one(<2 x i8> %x) {
+; CHECK-LABEL: @negate_sdiv_vec_splat_one(
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[NEG]]
+;
+  %div = sdiv <2 x i8> %x, <i8 1, i8 1>
+  %neg = sub <2 x i8> zeroinitializer, %div
+  ret <2 x i8> %neg
+}
+
+; Can't negate signed-min constant, but can convert to a compare..
+
+define <2 x i8> @negate_sdiv_vec_splat_signed_min(<2 x i8> %x) {
+; CHECK-LABEL: @negate_sdiv_vec_splat_signed_min(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[NEG]]
+;
+  %div = sdiv <2 x i8> %x, <i8 -128, i8 -128>
+  %neg = sub <2 x i8> zeroinitializer, %div
+  ret <2 x i8> %neg
+}
+
+; Division by -1 may be UB for any element of a vector.
+
+define <2 x i8> @negate_sdiv_vec_one_element(<2 x i8> %x) {
+; CHECK-LABEL: @negate_sdiv_vec_one_element(
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 -1, i8 1>
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[DIV]]
+; CHECK-NEXT:    ret <2 x i8> [[NEG]]
+;
+  %div = sdiv <2 x i8> %x, <i8 -1, i8 1>
+  %neg = sub <2 x i8> zeroinitializer, %div
+  ret <2 x i8> %neg
+}
+
+; Can't negate signed-min constant for any element of a vector.
+
+define <2 x i8> @negate_sdiv_vec_signed_min_elt(<2 x i8> %x) {
+; CHECK-LABEL: @negate_sdiv_vec_signed_min_elt(
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 -1, i8 -128>
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[DIV]]
+; CHECK-NEXT:    ret <2 x i8> [[NEG]]
+;
+  %div = sdiv <2 x i8> %x, <i8 -1, i8 -128>
+  %neg = sub <2 x i8> zeroinitializer, %div
+  ret <2 x i8> %neg
+}
+
+; Division by -1 may be UB and can't negate signed-min.
+
+define <2 x i8> @negate_sdiv_vec_signed_min_and_one_elt(<2 x i8> %x) {
+; CHECK-LABEL: @negate_sdiv_vec_signed_min_and_one_elt(
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 1, i8 -128>
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[DIV]]
+; CHECK-NEXT:    ret <2 x i8> [[NEG]]
+;
+  %div = sdiv <2 x i8> %x, <i8 1, i8 -128>
+  %neg = sub <2 x i8> zeroinitializer, %div
+  ret <2 x i8> %neg
+}
+
+define i32 @test_exact_nonsw_exact(i32 %x) {
+; CHECK-LABEL: @test_exact_nonsw_exact(
+; CHECK-NEXT:    [[NEG:%.*]] = sdiv exact i32 [[X:%.*]], -3
+; CHECK-NEXT:    ret i32 [[NEG]]
+;
+  %div = sdiv exact i32 %x, 3
+  %neg = sub i32 0, %div
+  ret i32 %neg
+}
+
+define i32 @test_exact_nsw_noexact(i32 %x) {
+; CHECK-LABEL: @test_exact_nsw_noexact(
+; CHECK-NEXT:    [[NEG:%.*]] = sdiv i32 [[X:%.*]], -3
+; CHECK-NEXT:    ret i32 [[NEG]]
+;
+  %div = sdiv i32 %x, 3
+  %neg = sub nsw i32 0, %div
+  ret i32 %neg
+}
+
+define i32 @test_exact_nonsw_noexact(i32 %x) {
+; CHECK-LABEL: @test_exact_nonsw_noexact(
+; CHECK-NEXT:    [[NEG:%.*]] = sdiv i32 [[X:%.*]], -3
+; CHECK-NEXT:    ret i32 [[NEG]]
+;
+  %div = sdiv i32 %x, 3
+  %neg = sub i32 0, %div
+  ret i32 %neg
+}
+
+define i32 @test_exact_div_nonconst(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_exact_div_nonconst(
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv exact i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[DIV]]
+; CHECK-NEXT:    ret i32 [[NEG]]
+;
+  %div = sdiv exact i32 %x, %y
+  %neg = sub nsw i32 0, %div
+  ret i32 %neg
+}
+
+define i32 @test_exact_div_one(i32 %x) {
+; CHECK-LABEL: @test_exact_div_one(
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[NEG]]
+;
+  %div = sdiv exact i32 %x, 1
+  %neg = sub nsw i32 0, %div
+  ret i32 %neg
+}
+
+define i8 @test_exact_div_minSigned(i8 %x) {
+; CHECK-LABEL: @test_exact_div_minSigned(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], -128
+; CHECK-NEXT:    [[NEG:%.*]] = sext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[NEG]]
+;
+  %div = sdiv exact i8 %x, -128
+  %neg = sub nsw i8 0, %div
+  ret i8 %neg
+}
+
+; X / INT_MIN --> X == INT_MIN
+
+define i8 @sdiv_by_int_min(i8 %x) {
+; CHECK-LABEL: @sdiv_by_int_min(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], -128
+; CHECK-NEXT:    [[D:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[D]]
+;
+  %d = sdiv i8 %x, -128
+  ret i8 %d
+}
+
+define <2 x i8> @sdiv_by_int_min_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_by_int_min_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[D]]
+;
+  %d = sdiv <2 x i8> %x, <i8 -128, i8 -128>
+  ret <2 x i8> %d
+}
+
+define <2 x i8> @sdiv_by_int_min_vec_splat_undef(<2 x i8> %x) {
+; CHECK-LABEL: @sdiv_by_int_min_vec_splat_undef(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %d = sdiv <2 x i8> %x, <i8 -128, i8 undef>
+  ret <2 x i8> %d
+}
diff --git a/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll b/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll
new file mode 100644
index 00000000000..e8f7f720b15
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -0,0 +1,574 @@
+; RUN: opt < %s -instcombine -S -mtriple x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK,LINUX,ISC99
+; RUN: opt < %s -instcombine -S -mtriple x86_64-pc-win32          | FileCheck %s --check-prefixes=CHECK,ISC99
+; RUN: opt < %s -instcombine -S -mtriple x86_64-pc-windows-msvc16 | FileCheck %s --check-prefixes=CHECK,MS64,ISC89
+; RUN: opt < %s -instcombine -S -mtriple i386-pc-windows-msvc     | FileCheck %s --check-prefixes=CHECK,ISC99
+; RUN: opt < %s -instcombine -S -mtriple i686-pc-windows-msvc17   | FileCheck %s --check-prefixes=CHECK,MS32,ISC89
+
+; Check for and against shrinkage when using the
+; unsafe-fp-math function attribute on a math lib
+; function. This optimization may be overridden by
+; the -enable-double-float-shrink option.
+; PR17850: http://llvm.org/bugs/show_bug.cgi?id=17850
+
+define float @acos_test1(float %f)   {
+; CHECK-LABEL: @acos_test1(
+; LINUX-NEXT:    [[ACOSF:%.*]] = call fast float @acosf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[ACOSF]]
+; MS32:          [[ACOSF:%.*]] = call fast double @acos(double [[F:%.*]])
+; MS64-NEXT:     [[ACOSF:%.*]] = call fast float @acosf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @acos(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @acos_test2(float %f)   {
+; CHECK-LABEL: @acos_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @acos(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @acos(double %conv)
+  ret double %call
+}
+
+define float @acosh_test1(float %f)   {
+; CHECK-LABEL: @acosh_test1(
+; ISC99-NEXT:    [[ACOSHF:%.*]] = call fast float @acoshf(float [[F:%.*]])
+; ISC99-NEXT:    ret float [[ACOSHF]]
+; ISC89:         [[ACOSHF:%.*]] = call fast double @acosh(double [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @acosh(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @acosh_test2(float %f)   {
+; CHECK-LABEL: @acosh_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @acosh(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @acosh(double %conv)
+  ret double %call
+}
+
+define float @asin_test1(float %f)   {
+; CHECK-LABEL: @asin_test1(
+; LINUX-NEXT:    [[ASINF:%.*]] = call fast float @asinf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[ASINF]]
+; MS32:          [[ASINF:%.*]] = call fast double @asin(double [[F:%.*]])
+; MS64-NEXT:     [[ASINF:%.*]] = call fast float @asinf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @asin(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @asin_test2(float %f)   {
+; CHECK-LABEL: @asin_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @asin(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @asin(double %conv)
+  ret double %call
+}
+
+define float @asinh_test1(float %f)   {
+; CHECK-LABEL: @asinh_test1(
+; ISC99-NEXT:   [[ASINHF:%.*]] = call fast float @asinhf(float [[F:%.*]])
+; ISC99-NEXT:   ret float [[ASINHF]]
+; ISC89:        [[ASINHF:%.*]] = call fast double @asinh(double [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @asinh(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @asinh_test2(float %f)   {
+; CHECK-LABEL: @asinh_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @asinh(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @asinh(double %conv)
+  ret double %call
+}
+
+define float @atan_test1(float %f)   {
+; CHECK-LABEL: @atan_test1(
+; LINUX-NEXT:    [[ATANF:%.*]] = call fast float @atanf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[ATANF]]
+; MS32:          [[ATANF:%.*]] = call fast double @atan(double [[F:%.*]])
+; MS64-NEXT:     [[ATANF:%.*]] = call fast float @atanf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @atan(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @atan_test2(float %f)   {
+; CHECK-LABEL: @atan_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @atan(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @atan(double %conv)
+  ret double %call
+}
+
+define float @atanh_test1(float %f)   {
+; CHECK-LABEL: @atanh_test1(
+; ISC99-NEXT:    [[ATANHF:%.*]] = call fast float @atanhf(float [[F:%.*]])
+; ISC99-NEXT:    ret float [[ATANHF]]
+; ISC89:         [[ATANHF:%.*]] = call fast double @atanh(double [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @atanh(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @atanh_test2(float %f)   {
+; CHECK-LABEL: @atanh_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @atanh(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @atanh(double %conv)
+  ret double %call
+}
+
+define float @cbrt_test1(float %f)   {
+; CHECK-LABEL: @cbrt_test1(
+; ISC99-NEXT:    [[CBRTF:%.*]] = call fast float @cbrtf(float [[F:%.*]])
+; ISC99-NEXT:    ret float [[CBRTF]]
+; ISC89:         [[CBRTF:%.*]] = call fast double @cbrt(double [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @cbrt(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @cbrt_test2(float %f)   {
+; CHECK-LABEL: @cbrt_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @cbrt(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast  double @cbrt(double %conv)
+  ret double %call
+}
+
+define float @exp_test1(float %f)   {
+; CHECK-LABEL: @exp_test1(
+; LINUX-NEXT:    [[EXPF:%.*]] = call fast float @expf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[EXPF]]
+; MS32:          [[EXPF:%.*]] = call fast double @exp(double [[F:%.*]])
+; MS64-NEXT:     [[EXPF:%.*]] = call fast float @expf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @exp(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @exp_test2(float %f)   {
+; CHECK-LABEL: @exp_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @exp(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @exp(double %conv)
+  ret double %call
+}
+
+define float @expm1_test1(float %f)   {
+; CHECK-LABEL: @expm1_test1(
+; ISC99-NEXT:    [[EXPM1F:%.*]] = call fast float @expm1f(float [[F:%.*]])
+; ISC99-NEXT:    ret float [[EXPM1F]]
+; ISC89:         [[EXPM1F:%.*]] = call fast double @expm1(double [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @expm1(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @expm1_test2(float %f)   {
+; CHECK-LABEL: @expm1_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @expm1(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @expm1(double %conv)
+  ret double %call
+}
+
+; exp10f() doesn't exist for this triple, so it doesn't shrink.
+
+define float @exp10_test1(float %f)   {
+; CHECK-LABEL: @exp10_test1(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @exp10(double [[CONV]])
+; CHECK-NEXT:    [[CONV1:%.*]] = fptrunc double [[CALL]] to float
+; CHECK-NEXT:    ret float [[CONV1]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @exp10(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @exp10_test2(float %f)   {
+; CHECK-LABEL: @exp10_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @exp10(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @exp10(double %conv)
+  ret double %call
+}
+
+define float @log_test1(float %f)   {
+; CHECK-LABEL: @log_test1(
+; LINUX-NEXT:    [[LOGF:%.*]] = call fast float @logf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[LOGF]]
+; MS32:          [[LOGF:%.*]] = call fast double @log(double [[F:%.*]])
+; MS64-NEXT:     [[LOGF:%.*]] = call fast float @logf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @log_test2(float %f)   {
+; CHECK-LABEL: @log_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @log(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log(double %conv)
+  ret double %call
+}
+
+define float @log10_test1(float %f)   {
+; CHECK-LABEL: @log10_test1(
+; LINUX-NEXT:    [[LOG10F:%.*]] = call fast float @log10f(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[LOG10F]]
+; MS32:          [[LOG10F:%.*]] = call fast double @log10(double [[F:%.*]])
+; MS64-NEXT:     [[LOG10F:%.*]] = call fast float @log10f(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log10(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @log10_test2(float %f) {
+; CHECK-LABEL: @log10_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @log10(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log10(double %conv)
+  ret double %call
+}
+
+define float @log1p_test1(float %f)   {
+; CHECK-LABEL: @log1p_test1(
+; ISC99-NEXT:    [[LOG1PF:%.*]] = call fast float @log1pf(float [[F:%.*]])
+; ISC99-NEXT:    ret float [[LOG1PF]]
+; ISC89:         [[LOG1PF:%.*]] = call fast double @log1p(double [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log1p(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @log1p_test2(float %f)   {
+; CHECK-LABEL: @log1p_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @log1p(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log1p(double %conv)
+  ret double %call
+}
+
+define float @log2_test1(float %f)   {
+; CHECK-LABEL: @log2_test1(
+; ISC99-NEXT:    [[LOG2F:%.*]] = call fast float @log2f(float [[F:%.*]])
+; ISC99-NEXT:    ret float [[LOG2F]]
+; ISC89:         [[LOG2F:%.*]] = call fast double @log2(double [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log2(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @log2_test2(float %f)   {
+; CHECK-LABEL: @log2_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @log2(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @log2(double %conv)
+  ret double %call
+}
+
+define float @logb_test1(float %f)   {
+; CHECK-LABEL: @logb_test1(
+; LINUX-NEXT:    [[LOGBF:%.*]] = call fast float @logbf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[LOGBF]]
+; MS32:          [[POWF:%.*]] = call fast double @logb(double [[F:%.*]])
+; MS64-NEXT:     [[LOGBF:%.*]] = call fast float @logbf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @logb(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @logb_test2(float %f)   {
+; CHECK-LABEL: @logb_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @logb(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @logb(double %conv)
+  ret double %call
+}
+
+define float @pow_test1(float %f, float %g)   {
+; CHECK-LABEL: @pow_test1(
+; LINUX-NEXT:    [[POWF:%.*]] = call fast float @powf(float %f, float %g)
+; LINUX-NEXT:    ret float [[POWF]]
+; MS32:          [[POWF:%.*]] = call fast double @pow(double %df, double %dg)
+; MS64-NEXT:     [[POWF:%.*]] = call fast float @powf(float %f, float %g)
+;
+  %df = fpext float %f to double
+  %dg = fpext float %g to double
+  %call = call fast double @pow(double %df, double %dg)
+  %fr = fptrunc double %call to float
+  ret float %fr
+}
+
+define double @pow_test2(float %f, float %g) {
+; CHECK-LABEL: @pow_test2(
+; CHECK:         [[POW:%.*]] = call fast double @pow(double %df, double %dg)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %df = fpext float %f to double
+  %dg = fpext float %g to double
+  %call = call fast double @pow(double %df, double %dg)
+  ret double %call
+}
+
+define float @sin_test1(float %f)   {
+; CHECK-LABEL: @sin_test1(
+; LINUX-NEXT:    [[SINF:%.*]] = call fast float @sinf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[SINF]]
+; MS32:          [[SINF:%.*]] = call fast double @sin(double [[F:%.*]])
+; MS64-NEXT:     [[SINF:%.*]] = call fast float @sinf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @sin(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @sin_test2(float %f) {
+; CHECK-LABEL: @sin_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @sin(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @sin(double %conv)
+  ret double %call
+}
+
+define float @sqrt_test1(float %f) {
+; CHECK-LABEL: @sqrt_test1(
+; LINUX-NEXT:    [[SQRTF:%.*]] = call float @sqrtf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[SQRTF]]
+; MS32:          [[SQRTF:%.*]] = call double @sqrt(double [[F:%.*]])
+; MS64-NEXT:     [[SQRTF:%.*]] = call float @sqrtf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call double @sqrt(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @sqrt_test2(float %f) {
+; CHECK-LABEL: @sqrt_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call double @sqrt(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call double @sqrt(double %conv)
+  ret double %call
+}
+
+define float @sqrt_int_test1(float %f) {
+; CHECK-LABEL: @sqrt_int_test1(
+; LINUX-NEXT:    [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[TMP1]]
+; MS32:          [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]])
+; MS64-NEXT:     [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call double @llvm.sqrt.f64(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @sqrt_int_test2(float %f) {
+; CHECK-LABEL: @sqrt_int_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call double @llvm.sqrt.f64(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call double @llvm.sqrt.f64(double %conv)
+  ret double %call
+}
+
+define float @tan_test1(float %f) {
+; CHECK-LABEL: @tan_test1(
+; LINUX-NEXT:    [[TANF:%.*]] = call fast float @tanf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[TANF]]
+; MS32:          [[TANF:%.*]] = call fast double @tan(double [[F:%.*]])
+; MS64-NEXT:     [[TANF:%.*]] = call fast float @tanf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @tan(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @tan_test2(float %f) {
+; CHECK-LABEL: @tan_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @tan(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @tan(double %conv)
+  ret double %call
+}
+define float @tanh_test1(float %f) {
+; CHECK-LABEL: @tanh_test1(
+; LINUX-NEXT:    [[TANHF:%.*]] = call fast float @tanhf(float [[F:%.*]])
+; LINUX-NEXT:    ret float [[TANHF]]
+; MS32:          [[TANHF:%.*]] = call fast double @tanh(double [[F:%.*]])
+; MS64-NEXT:     [[TANHF:%.*]] = call fast float @tanhf(float [[F:%.*]])
+;
+  %conv = fpext float %f to double
+  %call = call fast double @tanh(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+define double @tanh_test2(float %f) {
+; CHECK-LABEL: @tanh_test2(
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @tanh(double [[CONV]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
+  %conv = fpext float %f to double
+  %call = call fast double @tanh(double %conv)
+  ret double %call
+}
+
+; 'arcp' on an fmax() is meaningless. This test just proves that
+; flags are propagated for shrunken *binary* double FP calls.
+define float @max1(float %a, float %b) {
+; CHECK-LABEL: @max1(
+; ISC99-NEXT:    [[FMAXF:%.*]] = call arcp float @fmaxf(float [[A:%.*]], float [[B:%.*]])
+; ISC99-NEXT:    ret float [[FMAXF]]
+; ISC89:         [[FMAXF:%.*]] = call arcp double @fmax(double [[A:%.*]], double [[B:%.*]])
+;
+  %c = fpext float %a to double
+  %d = fpext float %b to double
+  %e = call arcp double @fmax(double %c, double %d)
+  %f = fptrunc double %e to float
+  ret float %f
+}
+
+; A function can have a name that matches a common libcall,
+; but with the wrong type(s). Let it be.
+
+define float @fake_fmin(float %a, float %b) {
+; CHECK-LABEL: @fake_fmin(
+; CHECK-NEXT:    [[C:%.*]] = fpext float [[A:%.*]] to fp128
+; CHECK-NEXT:    [[D:%.*]] = fpext float [[B:%.*]] to fp128
+; CHECK-NEXT:    [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc fp128 [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %c = fpext float %a to fp128
+  %d = fpext float %b to fp128
+  %e = call fp128 @fmin(fp128 %c, fp128 %d)
+  %f = fptrunc fp128 %e to float
+  ret float %f
+}
+
+declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for.
+
+declare double @fmax(double, double)
+
+declare double @tanh(double)
+declare double @tan(double)
+
+; sqrt is a special case: the shrinking optimization
+; is valid even without unsafe-fp-math.
+declare double @sqrt(double)
+declare double @llvm.sqrt.f64(double)
+
+declare double @sin(double)
+declare double @pow(double, double)
+declare double @log2(double)
+declare double @log1p(double)
+declare double @log10(double)
+declare double @log(double)
+declare double @logb(double)
+declare double @exp10(double)
+declare double @expm1(double)
+declare double @exp(double)
+declare double @cbrt(double)
+declare double @atanh(double)
+declare double @atan(double)
+declare double @acos(double)
+declare double @acosh(double)
+declare double @asin(double)
+declare double @asinh(double)
+
diff --git a/llvm/test/Transforms/InstCombine/double-float-shrink-2.ll b/llvm/test/Transforms/InstCombine/double-float-shrink-2.ll
new file mode 100644
index 00000000000..76e497bd68f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/double-float-shrink-2.ll
@@ -0,0 +1,654 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-linux"     | FileCheck %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-win32"     | FileCheck %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32"   | FileCheck %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-mingw32"   | FileCheck %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck %s
+; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32" -enable-debugify 2>&1 | FileCheck --check-prefix=DBG-VALID %s
+
+declare double @floor(double)
+declare double @ceil(double)
+declare double @round(double)
+declare double @nearbyint(double)
+declare double @trunc(double)
+declare double @fabs(double)
+
+declare double @llvm.ceil.f64(double)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+
+declare double @llvm.fabs.f64(double)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+declare double @llvm.floor.f64(double)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+
+declare double @llvm.nearbyint.f64(double)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+
+declare float @llvm.rint.f32(float)
+declare <2 x float> @llvm.rint.v2f32(<2 x float>)
+
+declare double @llvm.round.f64(double)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
+
+declare double @llvm.trunc.f64(double)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+
+define float @test_shrink_libcall_floor(float %C) {
+; CHECK-LABEL: @test_shrink_libcall_floor(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.floor.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext float %C to double
+  ; --> floorf
+  %E = call double @floor(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_libcall_ceil(float %C) {
+; CHECK-LABEL: @test_shrink_libcall_ceil(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.ceil.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext float %C to double
+  ; --> ceilf
+  %E = call double @ceil(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_libcall_round(float %C) {
+; CHECK-LABEL: @test_shrink_libcall_round(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.round.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext float %C to double
+  ; --> roundf
+  %E = call double @round(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_libcall_nearbyint(float %C) {
+; CHECK-LABEL: @test_shrink_libcall_nearbyint(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.nearbyint.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext float %C to double
+  ; --> nearbyintf
+  %E = call double @nearbyint(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_libcall_trunc(float %C) {
+; CHECK-LABEL: @test_shrink_libcall_trunc(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.trunc.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext float %C to double
+  ; --> truncf
+  %E = call double @trunc(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; This is replaced with the intrinsic, which does the right thing on
+; CHECK platforms.
+define float @test_shrink_libcall_fabs(float %C) {
+; CHECK-LABEL: @test_shrink_libcall_fabs(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.fabs.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext float %C to double
+  %E = call double @fabs(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+define float @test_shrink_libcall_fabs_fast(float %C) {
+; CHECK-LABEL: @test_shrink_libcall_fabs_fast(
+; CHECK-NEXT:    [[F:%.*]] = call fast float @llvm.fabs.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext float %C to double
+  %E = call fast double @fabs(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_intrin_ceil(float %C) {
+; CHECK-LABEL: @test_shrink_intrin_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.ceil.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %D = fpext float %C to double
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_intrin_fabs(float %C) {
+; CHECK-LABEL: @test_shrink_intrin_fabs(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %D = fpext float %C to double
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_intrin_floor(float %C) {
+; CHECK-LABEL: @test_shrink_intrin_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.floor.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %D = fpext float %C to double
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_intrin_nearbyint(float %C) {
+; CHECK-LABEL: @test_shrink_intrin_nearbyint(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.nearbyint.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %D = fpext float %C to double
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define half @test_shrink_intrin_rint(half %C) {
+; CHECK-LABEL: @test_shrink_intrin_rint(
+; CHECK-NEXT:    [[TMP1:%.*]] = call half @llvm.rint.f16(half [[C:%.*]])
+; CHECK-NEXT:    ret half [[TMP1]]
+;
+  %D = fpext half %C to float
+  %E = call float @llvm.rint.f32(float %D)
+  %F = fptrunc float %E to half
+  ret half %F
+}
+
+define float @test_shrink_intrin_round(float %C) {
+; CHECK-LABEL: @test_shrink_intrin_round(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.round.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %D = fpext float %C to double
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_intrin_trunc(float %C) {
+; CHECK-LABEL: @test_shrink_intrin_trunc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.trunc.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %D = fpext float %C to double
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+declare void @use_v2f64(<2 x double>)
+declare void @use_v2f32(<2 x float>)
+
+define <2 x float> @test_shrink_intrin_ceil_multi_use(<2 x float> %C) {
+; CHECK-LABEL: @test_shrink_intrin_ceil_multi_use(
+; CHECK-NEXT:    [[D:%.*]] = fpext <2 x float> [[C:%.*]] to <2 x double>
+; CHECK-NEXT:    [[E:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc <2 x double> [[E]] to <2 x float>
+; CHECK-NEXT:    call void @use_v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    ret <2 x float> [[F]]
+;
+  %D = fpext <2 x float> %C to <2 x double>
+  %E = call <2 x double> @llvm.ceil.v2f64(<2 x double> %D)
+  %F = fptrunc <2 x double> %E to <2 x float>
+  call void @use_v2f64(<2 x double> %D)
+  ret <2 x float> %F
+}
+
+define <2 x float> @test_shrink_intrin_fabs_multi_use(<2 x float> %C) {
+; CHECK-LABEL: @test_shrink_intrin_fabs_multi_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[C:%.*]])
+; CHECK-NEXT:    [[E:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+; CHECK-NEXT:    call void @use_v2f64(<2 x double> [[E]])
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
+  %D = fpext <2 x float> %C to <2 x double>
+  %E = call <2 x double> @llvm.fabs.v2f64(<2 x double> %D)
+  %F = fptrunc <2 x double> %E to <2 x float>
+  call void @use_v2f64(<2 x double> %E)
+  ret <2 x float> %F
+}
+
+define <2 x float> @test_shrink_intrin_floor_multi_use(<2 x float> %C) {
+; CHECK-LABEL: @test_shrink_intrin_floor_multi_use(
+; CHECK-NEXT:    [[D:%.*]] = fpext <2 x float> [[C:%.*]] to <2 x double>
+; CHECK-NEXT:    [[E:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc <2 x double> [[E]] to <2 x float>
+; CHECK-NEXT:    call void @use_v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    call void @use_v2f64(<2 x double> [[E]])
+; CHECK-NEXT:    ret <2 x float> [[F]]
+;
+  %D = fpext <2 x float> %C to <2 x double>
+  %E = call <2 x double> @llvm.floor.v2f64(<2 x double> %D)
+  %F = fptrunc <2 x double> %E to <2 x float>
+  call void @use_v2f64(<2 x double> %D)
+  call void @use_v2f64(<2 x double> %E)
+  ret <2 x float> %F
+}
+
+define <2 x float> @test_shrink_intrin_nearbyint_multi_use(<2 x float> %C) {
+; CHECK-LABEL: @test_shrink_intrin_nearbyint_multi_use(
+; CHECK-NEXT:    [[D:%.*]] = fpext <2 x float> [[C:%.*]] to <2 x double>
+; CHECK-NEXT:    [[E:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc <2 x double> [[E]] to <2 x float>
+; CHECK-NEXT:    call void @use_v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    ret <2 x float> [[F]]
+;
+  %D = fpext <2 x float> %C to <2 x double>
+  %E = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %D)
+  %F = fptrunc <2 x double> %E to <2 x float>
+  call void @use_v2f64(<2 x double> %D)
+  ret <2 x float> %F
+}
+
+define <2 x half> @test_shrink_intrin_rint_multi_use(<2 x half> %C) {
+; CHECK-LABEL: @test_shrink_intrin_rint_multi_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x half> @llvm.rint.v2f16(<2 x half> [[C:%.*]])
+; CHECK-NEXT:    [[E:%.*]] = fpext <2 x half> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    call void @use_v2f32(<2 x float> [[E]])
+; CHECK-NEXT:    ret <2 x half> [[TMP1]]
+;
+  %D = fpext <2 x half> %C to <2 x float>
+  %E = call <2 x float> @llvm.rint.v2f32(<2 x float> %D)
+  %F = fptrunc <2 x float> %E to <2 x half>
+  call void @use_v2f32(<2 x float> %E)
+  ret <2 x half> %F
+}
+
+define <2 x float> @test_shrink_intrin_round_multi_use(<2 x float> %C) {
+; CHECK-LABEL: @test_shrink_intrin_round_multi_use(
+; CHECK-NEXT:    [[D:%.*]] = fpext <2 x float> [[C:%.*]] to <2 x double>
+; CHECK-NEXT:    [[E:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc <2 x double> [[E]] to <2 x float>
+; CHECK-NEXT:    call void @use_v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    call void @use_v2f64(<2 x double> [[E]])
+; CHECK-NEXT:    ret <2 x float> [[F]]
+;
+  %D = fpext <2 x float> %C to <2 x double>
+  %E = call <2 x double> @llvm.round.v2f64(<2 x double> %D)
+  %F = fptrunc <2 x double> %E to <2 x float>
+  call void @use_v2f64(<2 x double> %D)
+  call void @use_v2f64(<2 x double> %E)
+  ret <2 x float> %F
+}
+
+define <2 x float> @test_shrink_intrin_trunc_multi_use(<2 x float> %C) {
+; CHECK-LABEL: @test_shrink_intrin_trunc_multi_use(
+; CHECK-NEXT:    [[D:%.*]] = fpext <2 x float> [[C:%.*]] to <2 x double>
+; CHECK-NEXT:    [[E:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc <2 x double> [[E]] to <2 x float>
+; CHECK-NEXT:    call void @use_v2f64(<2 x double> [[D]])
+; CHECK-NEXT:    ret <2 x float> [[F]]
+;
+  %D = fpext <2 x float> %C to <2 x double>
+  %E = call <2 x double> @llvm.trunc.v2f64(<2 x double> %D)
+  %F = fptrunc <2 x double> %E to <2 x float>
+  call void @use_v2f64(<2 x double> %D)
+  ret <2 x float> %F
+}
+
+; Make sure fast math flags are preserved
+define float @test_shrink_intrin_fabs_fast(float %C) {
+; CHECK-LABEL: @test_shrink_intrin_fabs_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.fabs.f32(float [[C:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %D = fpext float %C to double
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_no_shrink_intrin_floor(double %D) {
+; CHECK-LABEL: @test_no_shrink_intrin_floor(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.floor.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_no_shrink_intrin_ceil(double %D) {
+; CHECK-LABEL: @test_no_shrink_intrin_ceil(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.ceil.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_no_shrink_intrin_round(double %D) {
+; CHECK-LABEL: @test_no_shrink_intrin_round(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.round.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_no_shrink_intrin_nearbyint(double %D) {
+; CHECK-LABEL: @test_no_shrink_intrin_nearbyint(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.nearbyint.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_no_shrink_intrin_trunc(double %D) {
+; CHECK-LABEL: @test_no_shrink_intrin_trunc(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.trunc.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_intrin_fabs_double_src(double %D) {
+; CHECK-LABEL: @test_shrink_intrin_fabs_double_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double [[D:%.*]] to float
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.fabs.f32(float [[TMP1]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+define float @test_shrink_intrin_fabs_fast_double_src(double %D) {
+; CHECK-LABEL: @test_shrink_intrin_fabs_fast_double_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double [[D:%.*]] to float
+; CHECK-NEXT:    [[F:%.*]] = call fast float @llvm.fabs.f32(float [[TMP1]])
+; CHECK-NEXT:    ret float [[F]]
+;
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_float_convertible_constant_intrin_floor() {
+; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_floor(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %E = call double @llvm.floor.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_float_convertible_constant_intrin_ceil() {
+; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_ceil(
+; CHECK-NEXT:    ret float 3.000000e+00
+;
+  %E = call double @llvm.ceil.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_float_convertible_constant_intrin_round() {
+; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_round(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %E = call double @llvm.round.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_float_convertible_constant_intrin_nearbyint() {
+; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_nearbyint(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %E = call double @llvm.nearbyint.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_float_convertible_constant_intrin_trunc() {
+; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_trunc(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %E = call double @llvm.trunc.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_shrink_float_convertible_constant_intrin_fabs() {
+; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_fabs(
+; CHECK-NEXT:    ret float 0x4000CCCCC0000000
+;
+  %E = call double @llvm.fabs.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+define float @test_shrink_float_convertible_constant_intrin_fabs_fast() {
+; CHECK-LABEL: @test_shrink_float_convertible_constant_intrin_fabs_fast(
+; CHECK-NEXT:    ret float 0x4000CCCCC0000000
+;
+  %E = call fast double @llvm.fabs.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define half @test_no_shrink_mismatched_type_intrin_floor(double %D) {
+; CHECK-LABEL: @test_no_shrink_mismatched_type_intrin_floor(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.floor.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to half
+; CHECK-NEXT:    ret half [[F]]
+;
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+define half @test_no_shrink_mismatched_type_intrin_ceil(double %D) {
+; CHECK-LABEL: @test_no_shrink_mismatched_type_intrin_ceil(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.ceil.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to half
+; CHECK-NEXT:    ret half [[F]]
+;
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+define half @test_no_shrink_mismatched_type_intrin_round(double %D) {
+; CHECK-LABEL: @test_no_shrink_mismatched_type_intrin_round(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.round.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to half
+; CHECK-NEXT:    ret half [[F]]
+;
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+define half @test_no_shrink_mismatched_type_intrin_nearbyint(double %D) {
+; CHECK-LABEL: @test_no_shrink_mismatched_type_intrin_nearbyint(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.nearbyint.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to half
+; CHECK-NEXT:    ret half [[F]]
+;
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+define half @test_no_shrink_mismatched_type_intrin_trunc(double %D) {
+; CHECK-LABEL: @test_no_shrink_mismatched_type_intrin_trunc(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.trunc.f64(double [[D:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to half
+; CHECK-NEXT:    ret half [[F]]
+;
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+define half @test_shrink_mismatched_type_intrin_fabs_double_src(double %D) {
+; CHECK-LABEL: @test_shrink_mismatched_type_intrin_fabs_double_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double [[D:%.*]] to half
+; CHECK-NEXT:    [[F:%.*]] = call half @llvm.fabs.f16(half [[TMP1]])
+; CHECK-NEXT:    ret half [[F]]
+;
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; Make sure fast math flags are preserved
+define half @test_mismatched_type_intrin_fabs_fast_double_src(double %D) {
+; CHECK-LABEL: @test_mismatched_type_intrin_fabs_fast_double_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double [[D:%.*]] to half
+; CHECK-NEXT:    [[F:%.*]] = call fast half @llvm.fabs.f16(half [[TMP1]])
+; CHECK-NEXT:    ret half [[F]]
+;
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+define <2 x double> @test_shrink_intrin_floor_fp16_vec(<2 x half> %C) {
+; CHECK-LABEL: @test_shrink_intrin_floor_fp16_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call arcp <2 x half> @llvm.floor.v2f16(<2 x half> [[C:%.*]])
+; CHECK-NEXT:    [[E:%.*]] = fpext <2 x half> [[TMP1]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[E]]
+;
+  %D = fpext <2 x half> %C to <2 x double>
+  %E = call arcp <2 x double> @llvm.floor.v2f64(<2 x double> %D)
+  ret <2 x double> %E
+}
+
+define float @test_shrink_intrin_ceil_fp16_src(half %C) {
+; CHECK-LABEL: @test_shrink_intrin_ceil_fp16_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = call half @llvm.ceil.f16(half [[C:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fpext half [[TMP1]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext half %C to double
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define <2 x double> @test_shrink_intrin_round_fp16_vec(<2 x half> %C) {
+; CHECK-LABEL: @test_shrink_intrin_round_fp16_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x half> @llvm.round.v2f16(<2 x half> [[C:%.*]])
+; CHECK-NEXT:    [[E:%.*]] = fpext <2 x half> [[TMP1]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[E]]
+;
+  %D = fpext <2 x  half> %C to <2 x double>
+  %E = call <2 x double> @llvm.round.v2f64(<2 x double> %D)
+  ret <2 x double> %E
+}
+
+define float @test_shrink_intrin_nearbyint_fp16_src(half %C) {
+; CHECK-LABEL: @test_shrink_intrin_nearbyint_fp16_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = call half @llvm.nearbyint.f16(half [[C:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fpext half [[TMP1]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext half %C to double
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define <2 x double> @test_shrink_intrin_trunc_fp16_src(<2 x half> %C) {
+; CHECK-LABEL: @test_shrink_intrin_trunc_fp16_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x half> @llvm.trunc.v2f16(<2 x half> [[C:%.*]])
+; CHECK-NEXT:    [[E:%.*]] = fpext <2 x half> [[TMP1]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[E]]
+;
+  %D = fpext <2 x half> %C to <2 x double>
+  %E = call <2 x double> @llvm.trunc.v2f64(<2 x double> %D)
+  ret <2 x double> %E
+}
+
+define float @test_shrink_intrin_fabs_fp16_src(half %C) {
+; CHECK-LABEL: @test_shrink_intrin_fabs_fp16_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = call half @llvm.fabs.f16(half [[C:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fpext half [[TMP1]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext half %C to double
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+define float @test_shrink_intrin_fabs_fast_fp16_src(half %C) {
+; CHECK-LABEL: @test_shrink_intrin_fabs_fast_fp16_src(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast half @llvm.fabs.f16(half [[C:%.*]])
+; CHECK-NEXT:    [[F:%.*]] = fpext half [[TMP1]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext half %C to double
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_no_shrink_intrin_floor_multi_use_fpext(half %C) {
+; CHECK-LABEL: @test_no_shrink_intrin_floor_multi_use_fpext(
+; CHECK-NEXT:    [[D:%.*]] = fpext half [[C:%.*]] to double
+; CHECK-NEXT:    store volatile double [[D]], double* undef, align 8
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.floor.f64(double [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext half %C to double
+  store volatile double %D, double* undef
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_no_shrink_intrin_fabs_multi_use_fpext(half %C) {
+; CHECK-LABEL: @test_no_shrink_intrin_fabs_multi_use_fpext(
+; CHECK-NEXT:    [[D:%.*]] = fpext half [[C:%.*]] to double
+; CHECK-NEXT:    store volatile double [[D]], double* undef, align 8
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.fabs.f64(double [[D]])
+; CHECK-NEXT:    [[F:%.*]] = fptrunc double [[E]] to float
+; CHECK-NEXT:    ret float [[F]]
+;
+  %D = fpext half %C to double
+  store volatile double %D, double* undef
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; DBG-VALID: CheckModuleDebugify: PASS
diff --git a/llvm/test/Transforms/InstCombine/early_constfold_changes_IR.ll b/llvm/test/Transforms/InstCombine/early_constfold_changes_IR.ll
new file mode 100644
index 00000000000..18b21923480
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/early_constfold_changes_IR.ll
@@ -0,0 +1,20 @@
+; This run line verifies that we get the expected constant fold.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This run line verifies that InstructionCombiningPass::runOnFunction reports
+; this as a modification of the IR.
+; RUN: opt < %s -instcombine -disable-output -debug-pass=Details 2>&1 | FileCheck %s --check-prefix=DETAILS
+
+define i32 @foo(i32 %arg) #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG:%.*]], 7
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+entry:
+  %or = or i32 0, 7
+  %and = and i32 %arg, %or
+  ret i32 %and
+}
+
+; DETAILS:  Made Modification 'Combine redundant instructions' on Function 'foo'
diff --git a/llvm/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll b/llvm/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll
new file mode 100644
index 00000000000..743477621fa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; This test case exposed a bug in instcombine where the early
+; DCE of a call wasn't recognized as changing the IR.
+; So when runOnFunction propagated the "made changes" upwards
+; to the CallGraphSCCPass it signalled that no changes had been
+; made, so CallGraphSCCPass assumed that the old CallGraph,
+; as known by that pass manager, still was up-to-date.
+;
+; This was detected as an assert when trying to remove the
+; no longer used function 'bar' (due to incorrect reference
+; count in the CallGraph).
+
+attributes #0 = { noinline norecurse nounwind readnone }
+
+define void @foo() #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = call i32 @bar()
+  ret void
+}
+
+define internal i32 @bar() #0 {
+; CHECK-NOT: bar
+entry:
+  ret i32 42
+}
+
diff --git a/llvm/test/Transforms/InstCombine/element-atomic-memintrins.ll b/llvm/test/Transforms/InstCombine/element-atomic-memintrins.ll
new file mode 100644
index 00000000000..6bc62c94e32
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/element-atomic-memintrins.ll
@@ -0,0 +1,418 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+;; ---- memset -----
+
+; Ensure 0-length memset is removed
+define void @test_memset_zero_length(i8* %dest) {
+; CHECK-LABEL: @test_memset_zero_length(
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 0, i32 1)
+  ret void
+}
+
+define void @test_memset_to_store(i8* %dest) {
+; CHECK-LABEL: @test_memset_to_store(
+; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 1
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 2, i32 1)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 4, i32 1)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 8, i32 1)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 16, i32 1)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 2, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 4, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 8, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 16, i32 1)
+  ret void
+}
+
+define void @test_memset_to_store_2(i8* %dest) {
+; CHECK-LABEL: @test_memset_to_store_2(
+; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 2
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 [[DEST]], i8 1, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 [[DEST]], i8 1, i32 8, i32 2)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 [[DEST]], i8 1, i32 16, i32 2)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 2, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 4, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 8, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 16, i32 2)
+  ret void
+}
+
+define void @test_memset_to_store_4(i8* %dest) {
+; CHECK-LABEL: @test_memset_to_store_4(
+; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 4
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 [[DEST]], i8 1, i32 8, i32 4)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 [[DEST]], i8 1, i32 16, i32 4)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 2, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 4, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 8, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 16, i32 4)
+  ret void
+}
+
+define void @test_memset_to_store_8(i8* %dest) {
+; CHECK-LABEL: @test_memset_to_store_8(
+; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
+; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 8
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 [[DEST]], i8 1, i32 16, i32 8)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 2, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 4, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 8, i32 8)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 16, i32 8)
+  ret void
+}
+
+define void @test_memset_to_store_16(i8* %dest) {
+; CHECK-LABEL: @test_memset_to_store_16(
+; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
+; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 16
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 [[DEST]], i8 1, i32 16, i32 16)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 2, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 4, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 8, i32 8)
+  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 16, i32 16)
+  ret void
+}
+
+declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture writeonly, i8, i32, i32) nounwind argmemonly
+
+
+;; =========================================
+;; ----- memmove ------
+
+
+@gconst = constant [32 x i8] c"0123456789012345678901234567890\00"
+; Check that a memmove from a global constant is converted into a memcpy
+define void @test_memmove_to_memcpy(i8* %dest) {
+; CHECK-LABEL: @test_memmove_to_memcpy(
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST:%.*]], i8* align 16 getelementptr inbounds ([32 x i8], [32 x i8]* @gconst, i64 0, i64 0), i32 32, i32 1)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 getelementptr inbounds ([32 x i8], [32 x i8]* @gconst, i64 0, i64 0), i32 32, i32 1)
+  ret void
+}
+
+define void @test_memmove_zero_length(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memmove_zero_length(
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
+  ret void
+}
+
+; memmove with src==dest is removed
+define void @test_memmove_removed(i8* %srcdest, i32 %sz) {
+; CHECK-LABEL: @test_memmove_removed(
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
+  ret void
+}
+
+; memmove with a small constant length is converted to a load/store pair
+define void @test_memmove_loadstore(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memmove_loadstore(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 2, i32 1)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 4, i32 1)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 8, i32 1)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 16, i32 1)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 16, i32 1)
+  ret void
+}
+
+define void @test_memmove_loadstore_2(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memmove_loadstore_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 2
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 2
+; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 2
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 8, i32 2)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 16, i32 2)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 16, i32 2)
+  ret void
+}
+
+define void @test_memmove_loadstore_4(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memmove_loadstore_4(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 4
+; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 4
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 8, i32 4)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 16, i32 4)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 2, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
+  ret void
+}
+
+define void @test_memmove_loadstore_8(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memmove_loadstore_8(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 8
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 8
+; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 8
+; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
+; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 8
+; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 8
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i32 16, i32 8)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 2, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 4, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
+  ret void
+}
+
+define void @test_memmove_loadstore_16(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memmove_loadstore_16(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 16
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 16
+; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 16
+; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 16
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
+; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 16
+; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 16
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 [[DEST:%.*]], i8* align 16 [[SRC:%.*]], i32 16, i32 16)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 2, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 4, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 8, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
+  ret void
+}
+
+declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
+
+;; =========================================
+;; ----- memcpy ------
+
+define void @test_memcpy_zero_length(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memcpy_zero_length(
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
+  ret void
+}
+
+; memcpy with src==dest is removed
+define void @test_memcpy_removed(i8* %srcdest, i32 %sz) {
+; CHECK-LABEL: @test_memcpy_removed(
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
+  ret void
+}
+
+; memcpy with a small constant length is converted to a load/store pair
+define void @test_memcpy_loadstore(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memcpy_loadstore(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 2, i32 1)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 4, i32 1)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 8, i32 1)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 16, i32 1)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 16, i32 1)
+  ret void
+}
+
+define void @test_memcpy_loadstore_2(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memcpy_loadstore_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 2
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 2
+; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 2
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 8, i32 2)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 16, i32 2)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 16, i32 2)
+  ret void
+}
+
+define void @test_memcpy_loadstore_4(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memcpy_loadstore_4(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 4
+; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 4
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 8, i32 4)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 16, i32 4)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 2, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
+  ret void
+}
+
+define void @test_memcpy_loadstore_8(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memcpy_loadstore_8(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 8
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 8
+; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 8
+; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
+; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 8
+; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 8
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i32 16, i32 8)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 2, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 4, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
+  ret void
+}
+
+define void @test_memcpy_loadstore_16(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test_memcpy_loadstore_16(
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 16
+; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 16
+; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 16
+; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 16
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
+; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 16
+; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 16
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 [[DEST:%.*]], i8* align 16 [[SRC:%.*]], i32 16, i32 16)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 2, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 4, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 8, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
+  ret void
+}
+
+declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
diff --git a/llvm/test/Transforms/InstCombine/enforce-known-alignment.ll b/llvm/test/Transforms/InstCombine/enforce-known-alignment.ll
new file mode 100644
index 00000000000..323a7ec2998
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/enforce-known-alignment.ll
@@ -0,0 +1,38 @@
+; RUN: opt  -instcombine -S %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define void @foo(i32) {
+; CHECK-LABEL: @foo(
+; CHECK: alloca
+; CHECK: align 16
+	%2 = alloca [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>], align 16		; <[3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]*> [#uses=1]
+	%3 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>], [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]* %2, i32 0, i32 0		; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
+	%4 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>, <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>* %3, i32 0, i32 0		; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
+	%5 = getelementptr { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }, { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }* %4, i32 0, i32 0		; <{ [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }*> [#uses=1]
+	%6 = bitcast { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }* %5 to { [8 x i16] }*		; <{ [8 x i16] }*> [#uses=1]
+	%7 = getelementptr { [8 x i16] }, { [8 x i16] }* %6, i32 0, i32 0		; <[8 x i16]*> [#uses=1]
+	%8 = getelementptr [8 x i16], [8 x i16]* %7, i32 0, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %8, align 16
+    call void @bar(i16* %8)
+	ret void
+}
+
+declare void @bar(i16*)
+
+define void @foo_as1(i32 %a, [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b) {
+; CHECK-LABEL: @foo_as1(
+; CHECK: align 16
+  %1 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>], [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>] addrspace(1)* %b, i32 0, i32 0        ; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
+  %2 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>, <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }> addrspace(1)* %1, i32 0, i32 0      ; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
+  %3 = getelementptr { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }, { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } addrspace(1)* %2, i32 0, i32 0        ; <{ [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }*> [#uses=1]
+  %4 = bitcast { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } addrspace(1)* %3 to { [8 x i16] } addrspace(1)*     ; <{ [8 x i16] }*> [#uses=1]
+  %5 = getelementptr { [8 x i16] }, { [8 x i16] } addrspace(1)* %4, i32 0, i32 0     ; <[8 x i16]*> [#uses=1]
+  %6 = getelementptr [8 x i16], [8 x i16] addrspace(1)* %5, i32 0, i32 0     ; <i16*> [#uses=1]
+  store i16 0, i16 addrspace(1)* %6, align 16
+  call void @bar_as1(i16 addrspace(1)* %6)
+  ret void
+}
+
+declare void @bar_as1(i16 addrspace(1)*)
diff --git a/llvm/test/Transforms/InstCombine/err-rep-cold.ll b/llvm/test/Transforms/InstCombine/err-rep-cold.ll
new file mode 100644
index 00000000000..763a5752e73
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/err-rep-cold.ll
@@ -0,0 +1,77 @@
+; Test the static branch probability heuristics for error-reporting functions.
+; RUN: opt < %s -instcombine -S | FileCheck -enable-var-scope %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@stdout = external global %struct._IO_FILE*
+@stderr = external global %struct._IO_FILE*
+@.str = private unnamed_addr constant [13 x i8] c"an error: %d\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"an error\00", align 1
+
+define i32 @test1(i32 %a) #0 {
+; CHECK-LABEL: @test1
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
+  %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
+  br label %return
+
+; CHECK: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %a) #[[$AT1:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) #1
+
+define i32 @test2(i32 %a) #0 {
+; CHECK-LABEL: @test2
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[$AT2:[0-9]+]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1
+
+define i32 @test3(i32 %a) #0 {
+; CHECK-LABEL: @test3
+entry:
+  %cmp = icmp sgt i32 %a, 8
+  br i1 %cmp, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
+  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  br label %return
+
+; CHECK-NOT: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[$AT2]]
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+; CHECK: attributes #[[$AT1]] = { cold nounwind }
+; CHECK: attributes #[[$AT2]] = { cold }
+
diff --git a/llvm/test/Transforms/InstCombine/exact.ll b/llvm/test/Transforms/InstCombine/exact.ll
new file mode 100644
index 00000000000..96b6fd68996
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/exact.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @sdiv1(i32 %x) {
+; CHECK-LABEL: @sdiv1(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 %x, 8
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %y = sdiv i32 %x, 8
+  ret i32 %y
+}
+
+define i32 @sdiv2(i32 %x) {
+; CHECK-LABEL: @sdiv2(
+; CHECK-NEXT:    [[Y:%.*]] = ashr exact i32 %x, 3
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %y = sdiv exact i32 %x, 8
+  ret i32 %y
+}
+
+define <2 x i32> @sdiv2_vec(<2 x i32> %x) {
+; CHECK-LABEL: @sdiv2_vec(
+; CHECK-NEXT:    [[Y:%.*]] = ashr exact <2 x i32> %x, <i32 7, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[Y]]
+;
+  %y = sdiv exact <2 x i32> %x, <i32 128, i32 128>
+  ret <2 x i32> %y
+}
+
+define i32 @sdiv3(i32 %x) {
+; CHECK-LABEL: @sdiv3(
+; CHECK-NEXT:    [[Y:%.*]] = srem i32 %x, 3
+; CHECK-NEXT:    [[Z:%.*]] = sub i32 %x, [[Y]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %y = sdiv i32 %x, 3
+  %z = mul i32 %y, 3
+  ret i32 %z
+}
+
+define i32 @sdiv4(i32 %x) {
+; CHECK-LABEL: @sdiv4(
+; CHECK-NEXT:    ret i32 %x
+;
+  %y = sdiv exact i32 %x, 3
+  %z = mul i32 %y, 3
+  ret i32 %z
+}
+
+define i32 @sdiv5(i32 %x) {
+; CHECK-LABEL: @sdiv5(
+; CHECK-NEXT:    [[Y:%.*]] = srem i32 %x, 3
+; CHECK-NEXT:    [[Z:%.*]] = sub i32 [[Y]], %x
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %y = sdiv i32 %x, 3
+  %z = mul i32 %y, -3
+  ret i32 %z
+}
+
+define i32 @sdiv6(i32 %x) {
+; CHECK-LABEL: @sdiv6(
+; CHECK-NEXT:    [[Z:%.*]] = sub i32 0, %x
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %y = sdiv exact i32 %x, 3
+  %z = mul i32 %y, -3
+  ret i32 %z
+}
+
+define i32 @udiv1(i32 %x, i32 %w) {
+; CHECK-LABEL: @udiv1(
+; CHECK-NEXT:    ret i32 %x
+;
+  %y = udiv exact i32 %x, %w
+  %z = mul i32 %y, %w
+  ret i32 %z
+}
+
+define i32 @udiv2(i32 %x, i32 %w) {
+; CHECK-LABEL: @udiv2(
+; CHECK-NEXT:    [[Z:%.*]] = lshr exact i32 %x, %w
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %y = shl i32 1, %w
+  %z = udiv exact i32 %x, %y
+  ret i32 %z
+}
+
+define i64 @ashr1(i64 %X) {
+; CHECK-LABEL: @ashr1(
+; CHECK-NEXT:    [[A:%.*]] = shl i64 %X, 8
+; CHECK-NEXT:    [[B:%.*]] = ashr exact i64 [[A]], 2
+; CHECK-NEXT:    ret i64 [[B]]
+;
+  %A = shl i64 %X, 8
+  %B = ashr i64 %A, 2
+  ret i64 %B
+}
+
+; The vector ashr should be exact (like it is in the preceding test).
+
+define <2 x i64> @ashr1_vec(<2 x i64> %X) {
+; CHECK-LABEL: @ashr1_vec(
+; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> %X, <i64 8, i64 8>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 2, i64 2>
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+  %A = shl <2 x i64> %X, <i64 8, i64 8>
+  %B = ashr <2 x i64> %A, <i64 2, i64 2>
+  ret <2 x i64> %B
+}
+
+; PR9120
+define i1 @ashr_icmp1(i64 %X) {
+; CHECK-LABEL: @ashr_icmp1(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i64 %X, 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = ashr exact i64 %X, 2   ; X/4
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+define i1 @ashr_icmp2(i64 %X) {
+; CHECK-LABEL: @ashr_icmp2(
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt i64 %X, 16
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %Y = ashr exact i64 %X, 2  ; x / 4
+  %Z = icmp slt i64 %Y, 4    ; x < 16
+  ret i1 %Z
+}
+
+define <2 x i1> @ashr_icmp2_vec(<2 x i64> %X) {
+; CHECK-LABEL: @ashr_icmp2_vec(
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i64> %X, <i64 16, i64 16>
+; CHECK-NEXT:    ret <2 x i1> [[Z]]
+;
+  %Y = ashr exact <2 x i64> %X, <i64 2, i64 2>
+  %Z = icmp slt <2 x i64> %Y, <i64 4, i64 4>
+  ret <2 x i1> %Z
+}
+
+; PR9998
+; Make sure we don't transform the ashr here into an sdiv
+define i1 @pr9998(i32 %V) {
+; CHECK-LABEL: @pr9998(
+; CHECK-NEXT:    [[W_MASK:%.*]] = and i32 %V, 1
+; CHECK-NEXT:    [[Z:%.*]] = icmp ne i32 [[W_MASK]], 0
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %W = shl i32 %V, 31
+  %X = ashr exact i32 %W, 31
+  %Y = sext i32 %X to i64
+  %Z = icmp ugt i64 %Y, 7297771788697658747
+  ret i1 %Z
+}
+
+; FIXME: Vectors should fold the same way.
+define <2 x i1> @pr9998vec(<2 x i32> %V) {
+; CHECK-LABEL: @pr9998vec(
+; CHECK-NEXT:    [[W:%.*]] = shl <2 x i32> %V, <i32 31, i32 31>
+; CHECK-NEXT:    [[X:%.*]] = ashr exact <2 x i32> [[W]], <i32 31, i32 31>
+; CHECK-NEXT:    [[Y:%.*]] = sext <2 x i32> [[X]] to <2 x i64>
+; CHECK-NEXT:    [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], <i64 7297771788697658747, i64 7297771788697658747>
+; CHECK-NEXT:    ret <2 x i1> [[Z]]
+;
+  %W = shl <2 x i32> %V, <i32 31, i32 31>
+  %X = ashr exact <2 x i32> %W, <i32 31, i32 31>
+  %Y = sext <2 x i32> %X to <2 x i64>
+  %Z = icmp ugt <2 x i64> %Y, <i64 7297771788697658747, i64 7297771788697658747>
+  ret <2 x i1> %Z
+}
+
+define i1 @udiv_icmp1(i64 %X) {
+; CHECK-LABEL: @udiv_icmp1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 %X, 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %A = udiv exact i64 %X, 5   ; X/5
+  %B = icmp ne i64 %A, 0
+  ret i1 %B
+}
+
+define <2 x i1> @udiv_icmp1_vec(<2 x i64> %X) {
+; CHECK-LABEL: @udiv_icmp1_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i64> %X, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %A = udiv exact <2 x i64> %X, <i64 5, i64 5>
+  %B = icmp ne <2 x i64> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define i1 @udiv_icmp2(i64 %X) {
+; CHECK-LABEL: @udiv_icmp2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 %X, 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %A = udiv exact i64 %X, 5   ; X/5 == 0 --> x == 0
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+define <2 x i1> @udiv_icmp2_vec(<2 x i64> %X) {
+; CHECK-LABEL: @udiv_icmp2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %A = udiv exact <2 x i64> %X, <i64 5, i64 5>
+  %B = icmp eq <2 x i64> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define i1 @sdiv_icmp1(i64 %X) {
+; CHECK-LABEL: @sdiv_icmp1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 %X, 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %A = sdiv exact i64 %X, 5   ; X/5 == 0 --> x == 0
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+define <2 x i1> @sdiv_icmp1_vec(<2 x i64> %X) {
+; CHECK-LABEL: @sdiv_icmp1_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
+  %B = icmp eq <2 x i64> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define i1 @sdiv_icmp2(i64 %X) {
+; CHECK-LABEL: @sdiv_icmp2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 %X, 5
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %A = sdiv exact i64 %X, 5   ; X/5 == 1 --> x == 5
+  %B = icmp eq i64 %A, 1
+  ret i1 %B
+}
+
+define <2 x i1> @sdiv_icmp2_vec(<2 x i64> %X) {
+; CHECK-LABEL: @sdiv_icmp2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 5, i64 5>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
+  %B = icmp eq <2 x i64> %A, <i64 1, i64 1>
+  ret <2 x i1> %B
+}
+
+define i1 @sdiv_icmp3(i64 %X) {
+; CHECK-LABEL: @sdiv_icmp3(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 %X, -5
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %A = sdiv exact i64 %X, 5   ; X/5 == -1 --> x == -5
+  %B = icmp eq i64 %A, -1
+  ret i1 %B
+}
+
+define <2 x i1> @sdiv_icmp3_vec(<2 x i64> %X) {
+; CHECK-LABEL: @sdiv_icmp3_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 -5, i64 -5>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
+  %B = icmp eq <2 x i64> %A, <i64 -1, i64 -1>
+  ret <2 x i1> %B
+}
+
+define i1 @sdiv_icmp4(i64 %X) {
+; CHECK-LABEL: @sdiv_icmp4(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 %X, 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %A = sdiv exact i64 %X, -5   ; X/-5 == 0 --> x == 0
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+define <2 x i1> @sdiv_icmp4_vec(<2 x i64> %X) {
+; CHECK-LABEL: @sdiv_icmp4_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
+  %B = icmp eq <2 x i64> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define i1 @sdiv_icmp5(i64 %X) {
+; CHECK-LABEL: @sdiv_icmp5(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 %X, -5
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %A = sdiv exact i64 %X, -5   ; X/-5 == 1 --> x == -5
+  %B = icmp eq i64 %A, 1
+  ret i1 %B
+}
+
+define <2 x i1> @sdiv_icmp5_vec(<2 x i64> %X) {
+; CHECK-LABEL: @sdiv_icmp5_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 -5, i64 -5>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
+  %B = icmp eq <2 x i64> %A, <i64 1, i64 1>
+  ret <2 x i1> %B
+}
+
+define i1 @sdiv_icmp6(i64 %X) {
+; CHECK-LABEL: @sdiv_icmp6(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 %X, 5
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %A = sdiv exact i64 %X, -5   ; X/-5 == -1 --> x == 5
+  %B = icmp eq i64 %A, -1
+  ret i1 %B
+}
+
+define <2 x i1> @sdiv_icmp6_vec(<2 x i64> %X) {
+; CHECK-LABEL: @sdiv_icmp6_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 5, i64 5>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
+  %B = icmp eq <2 x i64> %A, <i64 -1, i64 -1>
+  ret <2 x i1> %B
+}
+
diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll
new file mode 100644
index 00000000000..b6a56b9a9a7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/exp2-1.ll
@@ -0,0 +1,99 @@
+; Test that the exp2 library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=CHECK -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=LDEXPF
+; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=NOLDEXPF
+; RUN: opt < %s -instcombine -S -mtriple=amdgcn-unknown-unknown | FileCheck %s -check-prefix=INTRINSIC -check-prefix=NOLDEXP -check-prefix=NOLDEXPF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare double @exp2(double)
+declare float @exp2f(float)
+
+; Check exp2(sitofp(x)) -> ldexp(1.0, sext(x)).
+
+define double @test_simplify1(i32 %x) {
+; CHECK-LABEL: @test_simplify1(
+  %conv = sitofp i32 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify2(i16 signext %x) {
+; CHECK-LABEL: @test_simplify2(
+  %conv = sitofp i16 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify3(i8 signext %x) {
+; CHECK-LABEL: @test_simplify3(
+  %conv = sitofp i8 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define float @test_simplify4(i32 %x) {
+; CHECK-LABEL: @test_simplify4(
+  %conv = sitofp i32 %x to float
+  %ret = call float @exp2f(float %conv)
+; CHECK: call float @ldexpf
+  ret float %ret
+}
+
+; Check exp2(uitofp(x)) -> ldexp(1.0, zext(x)).
+
+define double @test_no_simplify1(i32 %x) {
+; CHECK-LABEL: @test_no_simplify1(
+  %conv = uitofp i32 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @exp2
+  ret double %ret
+}
+
+define double @test_simplify6(i16 zeroext %x) {
+; CHECK-LABEL: @test_simplify6(
+  %conv = uitofp i16 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify7(i8 zeroext %x) {
+; CHECK-LABEL: @test_simplify7(
+  %conv = uitofp i8 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define float @test_simplify8(i8 zeroext %x) {
+; CHECK-LABEL: @test_simplify8(
+  %conv = uitofp i8 %x to float
+  %ret = call float @exp2f(float %conv)
+; CHECK: call float @ldexpf
+  ret float %ret
+}
+
+declare double @llvm.exp2.f64(double)
+declare float @llvm.exp2.f32(float)
+
+define double @test_simplify9(i8 zeroext %x) {
+; INTRINSIC-LABEL: @test_simplify9(
+  %conv = uitofp i8 %x to double
+  %ret = call double @llvm.exp2.f64(double %conv)
+; LDEXP: call double @ldexp
+; NOLDEXP-NOT: call double @ldexp
+  ret double %ret
+}
+
+define float @test_simplify10(i8 zeroext %x) {
+; INTRINSIC-LABEL: @test_simplify10(
+  %conv = uitofp i8 %x to float
+  %ret = call float @llvm.exp2.f32(float %conv)
+; LDEXPF: call float @ldexpf
+; NOLDEXPF-NOT: call float @ldexpf
+  ret float %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/exp2-2.ll b/llvm/test/Transforms/InstCombine/exp2-2.ll
new file mode 100644
index 00000000000..19368dc48c6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/exp2-2.ll
@@ -0,0 +1,17 @@
+; Test that the exp2 library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare float @exp2(double)
+
+; Check that exp2 functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(i32 %x) {
+; CHECK-LABEL: @test_no_simplify1(
+  %conv = sitofp i32 %x to double
+  %ret = call float @exp2(double %conv)
+; CHECK: call float @exp2(double %conv)
+  ret float %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll
new file mode 100644
index 00000000000..5d6a3a1c355
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/extractelement.ll
@@ -0,0 +1,312 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ANY,LE
+; RUN: opt < %s -instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ANY,BE
+
+define i32 @extractelement_out_of_range(<2 x i32> %x) {
+; ANY-LABEL: @extractelement_out_of_range(
+; ANY-NEXT:    ret i32 undef
+;
+  %E1 = extractelement <2 x i32> %x, i8 16
+  ret i32 %E1
+}
+
+define i32 @extractelement_type_out_of_range(<2 x i32> %x) {
+; ANY-LABEL: @extractelement_type_out_of_range(
+; ANY-NEXT:    [[E1:%.*]] = extractelement <2 x i32> [[X:%.*]], i128 0
+; ANY-NEXT:    ret i32 [[E1]]
+;
+  %E1 = extractelement <2 x i32> %x, i128 0
+  ret i32 %E1
+}
+
+define i32 @bitcasted_inselt_equal_num_elts(float %f) {
+; ANY-LABEL: @bitcasted_inselt_equal_num_elts(
+; ANY-NEXT:    [[R:%.*]] = bitcast float [[F:%.*]] to i32
+; ANY-NEXT:    ret i32 [[R]]
+;
+  %vf = insertelement <4 x float> undef, float %f, i32 0
+  %vi = bitcast <4 x float> %vf to <4 x i32>
+  %r = extractelement <4 x i32> %vi, i32 0
+  ret i32 %r
+}
+
+define i64 @test2(i64 %in) {
+; ANY-LABEL: @test2(
+; ANY-NEXT:    ret i64 [[IN:%.*]]
+;
+  %vec = insertelement <8 x i64> undef, i64 %in, i32 0
+  %splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer
+  %add = add <8 x i64> %splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %r = extractelement <8 x i64> %add, i32 0
+  ret i64 %r
+}
+
+define i32 @bitcasted_inselt_wide_source_zero_elt(i64 %x) {
+; LE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
+; LE-NEXT:    [[R:%.*]] = trunc i64 [[X:%.*]] to i32
+; LE-NEXT:    ret i32 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_zero_elt(
+; BE-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
+; BE-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
+; BE-NEXT:    ret i32 [[R]]
+;
+  %i = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
+  %b = bitcast <2 x i64> %i to <4 x i32>
+  %r = extractelement <4 x i32> %b, i32 0
+  ret i32 %r
+}
+
+define i16 @bitcasted_inselt_wide_source_modulo_elt(i64 %x) {
+; LE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
+; LE-NEXT:    [[R:%.*]] = trunc i64 [[X:%.*]] to i16
+; LE-NEXT:    ret i16 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_modulo_elt(
+; BE-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 48
+; BE-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i16
+; BE-NEXT:    ret i16 [[R]]
+;
+  %i = insertelement <2 x i64> undef, i64 %x, i32 1
+  %b = bitcast <2 x i64> %i to <8 x i16>
+  %r = extractelement <8 x i16> %b, i32 4
+  ret i16 %r
+}
+
+define i32 @bitcasted_inselt_wide_source_not_modulo_elt(i64 %x) {
+; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
+; LE-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
+; LE-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
+; LE-NEXT:    ret i32 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt(
+; BE-NEXT:    [[R:%.*]] = trunc i64 [[X:%.*]] to i32
+; BE-NEXT:    ret i32 [[R]]
+;
+  %i = insertelement <2 x i64> undef, i64 %x, i32 0
+  %b = bitcast <2 x i64> %i to <4 x i32>
+  %r = extractelement <4 x i32> %b, i32 1
+  ret i32 %r
+}
+
+define i8 @bitcasted_inselt_wide_source_not_modulo_elt_not_half(i32 %x) {
+; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
+; LE-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
+; LE-NEXT:    [[R:%.*]] = trunc i32 [[TMP1]] to i8
+; LE-NEXT:    ret i8 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half(
+; BE-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 8
+; BE-NEXT:    [[R:%.*]] = trunc i32 [[TMP1]] to i8
+; BE-NEXT:    ret i8 [[R]]
+;
+  %i = insertelement <2 x i32> undef, i32 %x, i32 0
+  %b = bitcast <2 x i32> %i to <8 x i8>
+  %r = extractelement <8 x i8> %b, i32 2
+  ret i8 %r
+}
+
+define i3 @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(i15 %x) {
+; LE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
+; LE-NEXT:    [[TMP1:%.*]] = lshr i15 [[X:%.*]], 3
+; LE-NEXT:    [[R:%.*]] = trunc i15 [[TMP1]] to i3
+; LE-NEXT:    ret i3 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_not_modulo_elt_not_half_weird_types(
+; BE-NEXT:    [[TMP1:%.*]] = lshr i15 [[X:%.*]], 9
+; BE-NEXT:    [[R:%.*]] = trunc i15 [[TMP1]] to i3
+; BE-NEXT:    ret i3 [[R]]
+;
+  %i = insertelement <3 x i15> undef, i15 %x, i32 0
+  %b = bitcast <3 x i15> %i to <15 x i3>
+  %r = extractelement <15 x i3> %b, i32 1
+  ret i3 %r
+}
+
+; Negative test for the above fold, but we can remove the insert here.
+
+define i8 @bitcasted_inselt_wide_source_wrong_insert(<2 x i32> %v, i32 %x) {
+; ANY-LABEL: @bitcasted_inselt_wide_source_wrong_insert(
+; ANY-NEXT:    [[B:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8>
+; ANY-NEXT:    [[R:%.*]] = extractelement <8 x i8> [[B]], i32 2
+; ANY-NEXT:    ret i8 [[R]]
+;
+  %i = insertelement <2 x i32> %v, i32 %x, i32 1
+  %b = bitcast <2 x i32> %i to <8 x i8>
+  %r = extractelement <8 x i8> %b, i32 2
+  ret i8 %r
+}
+
+; Partial negative test for the above fold, extra uses are not allowed if shift is needed.
+
+declare void @use(<8 x i8>)
+
+define i8 @bitcasted_inselt_wide_source_uses(i32 %x) {
+; LE-LABEL: @bitcasted_inselt_wide_source_uses(
+; LE-NEXT:    [[I:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
+; LE-NEXT:    [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
+; LE-NEXT:    call void @use(<8 x i8> [[B]])
+; LE-NEXT:    [[R:%.*]] = extractelement <8 x i8> [[B]], i32 3
+; LE-NEXT:    ret i8 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_wide_source_uses(
+; BE-NEXT:    [[I:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0
+; BE-NEXT:    [[B:%.*]] = bitcast <2 x i32> [[I]] to <8 x i8>
+; BE-NEXT:    call void @use(<8 x i8> [[B]])
+; BE-NEXT:    [[R:%.*]] = trunc i32 [[X]] to i8
+; BE-NEXT:    ret i8 [[R]]
+;
+  %i = insertelement <2 x i32> undef, i32 %x, i32 0
+  %b = bitcast <2 x i32> %i to <8 x i8>
+  call void @use(<8 x i8> %b)
+  %r = extractelement <8 x i8> %b, i32 3
+  ret i8 %r
+}
+
+define float @bitcasted_inselt_to_FP(i64 %x) {
+; LE-LABEL: @bitcasted_inselt_to_FP(
+; LE-NEXT:    [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
+; LE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; LE-NEXT:    [[R:%.*]] = bitcast i32 [[TMP2]] to float
+; LE-NEXT:    ret float [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_to_FP(
+; BE-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; BE-NEXT:    [[R:%.*]] = bitcast i32 [[TMP1]] to float
+; BE-NEXT:    ret float [[R]]
+;
+  %i = insertelement <2 x i64> undef, i64 %x, i32 0
+  %b = bitcast <2 x i64> %i to <4 x float>
+  %r = extractelement <4 x float> %b, i32 1
+  ret float %r
+}
+
+declare void @use_v2i128(<2 x i128>)
+declare void @use_v8f32(<8 x float>)
+
+define float @bitcasted_inselt_to_FP_uses(i128 %x) {
+; ANY-LABEL: @bitcasted_inselt_to_FP_uses(
+; ANY-NEXT:    [[I:%.*]] = insertelement <2 x i128> undef, i128 [[X:%.*]], i32 0
+; ANY-NEXT:    call void @use_v2i128(<2 x i128> [[I]])
+; ANY-NEXT:    [[B:%.*]] = bitcast <2 x i128> [[I]] to <8 x float>
+; ANY-NEXT:    [[R:%.*]] = extractelement <8 x float> [[B]], i32 1
+; ANY-NEXT:    ret float [[R]]
+;
+  %i = insertelement <2 x i128> undef, i128 %x, i32 0
+  call void @use_v2i128(<2 x i128> %i)
+  %b = bitcast <2 x i128> %i to <8 x float>
+  %r = extractelement <8 x float> %b, i32 1
+  ret float %r
+}
+
+define float @bitcasted_inselt_to_FP_uses2(i128 %x) {
+; ANY-LABEL: @bitcasted_inselt_to_FP_uses2(
+; ANY-NEXT:    [[I:%.*]] = insertelement <2 x i128> undef, i128 [[X:%.*]], i32 0
+; ANY-NEXT:    [[B:%.*]] = bitcast <2 x i128> [[I]] to <8 x float>
+; ANY-NEXT:    call void @use_v8f32(<8 x float> [[B]])
+; ANY-NEXT:    [[R:%.*]] = extractelement <8 x float> [[B]], i32 1
+; ANY-NEXT:    ret float [[R]]
+;
+  %i = insertelement <2 x i128> undef, i128 %x, i32 0
+  %b = bitcast <2 x i128> %i to <8 x float>
+  call void @use_v8f32(<8 x float> %b)
+  %r = extractelement <8 x float> %b, i32 1
+  ret float %r
+}
+
+define i32 @bitcasted_inselt_from_FP(double %x) {
+; LE-LABEL: @bitcasted_inselt_from_FP(
+; LE-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; LE-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 32
+; LE-NEXT:    [[R:%.*]] = trunc i64 [[TMP2]] to i32
+; LE-NEXT:    ret i32 [[R]]
+;
+; BE-LABEL: @bitcasted_inselt_from_FP(
+; BE-NEXT:    [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
+; BE-NEXT:    [[R:%.*]] = trunc i64 [[TMP1]] to i32
+; BE-NEXT:    ret i32 [[R]]
+;
+  %i = insertelement <2 x double> undef, double %x, i32 0
+  %b = bitcast <2 x double> %i to <4 x i32>
+  %r = extractelement <4 x i32> %b, i32 1
+  ret i32 %r
+}
+
+declare void @use_v2f64(<2 x double>)
+declare void @use_v8i16(<8 x i16>)
+
+define i16 @bitcasted_inselt_from_FP_uses(double %x) {
+; ANY-LABEL: @bitcasted_inselt_from_FP_uses(
+; ANY-NEXT:    [[I:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; ANY-NEXT:    call void @use_v2f64(<2 x double> [[I]])
+; ANY-NEXT:    [[B:%.*]] = bitcast <2 x double> [[I]] to <8 x i16>
+; ANY-NEXT:    [[R:%.*]] = extractelement <8 x i16> [[B]], i32 1
+; ANY-NEXT:    ret i16 [[R]]
+;
+  %i = insertelement <2 x double> undef, double %x, i32 0
+  call void @use_v2f64(<2 x double> %i)
+  %b = bitcast <2 x double> %i to <8 x i16>
+  %r = extractelement <8 x i16> %b, i32 1
+  ret i16 %r
+}
+
+define i16 @bitcasted_inselt_from_FP_uses2(double %x) {
+; ANY-LABEL: @bitcasted_inselt_from_FP_uses2(
+; ANY-NEXT:    [[I:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; ANY-NEXT:    [[B:%.*]] = bitcast <2 x double> [[I]] to <8 x i16>
+; ANY-NEXT:    call void @use_v8i16(<8 x i16> [[B]])
+; ANY-NEXT:    [[R:%.*]] = extractelement <8 x i16> [[B]], i32 1
+; ANY-NEXT:    ret i16 [[R]]
+;
+  %i = insertelement <2 x double> undef, double %x, i32 0
+  %b = bitcast <2 x double> %i to <8 x i16>
+  call void @use_v8i16(<8 x i16> %b)
+  %r = extractelement <8 x i16> %b, i32 1
+  ret i16 %r
+}
+
+define float @bitcasted_inselt_to_and_from_FP(double %x) {
+; ANY-LABEL: @bitcasted_inselt_to_and_from_FP(
+; ANY-NEXT:    [[I:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; ANY-NEXT:    [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
+; ANY-NEXT:    [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
+; ANY-NEXT:    ret float [[R]]
+;
+  %i = insertelement <2 x double> undef, double %x, i32 0
+  %b = bitcast <2 x double> %i to <4 x float>
+  %r = extractelement <4 x float> %b, i32 1
+  ret float %r
+}
+
+define float @bitcasted_inselt_to_and_from_FP_uses(double %x) {
+; ANY-LABEL: @bitcasted_inselt_to_and_from_FP_uses(
+; ANY-NEXT:    [[I:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; ANY-NEXT:    call void @use_v2f64(<2 x double> [[I]])
+; ANY-NEXT:    [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
+; ANY-NEXT:    [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
+; ANY-NEXT:    ret float [[R]]
+;
+  %i = insertelement <2 x double> undef, double %x, i32 0
+  call void @use_v2f64(<2 x double> %i)
+  %b = bitcast <2 x double> %i to <4 x float>
+  %r = extractelement <4 x float> %b, i32 1
+  ret float %r
+}
+
+declare void @use_v4f32(<4 x float>)
+
+define float @bitcasted_inselt_to_and_from_FP_uses2(double %x) {
+; ANY-LABEL: @bitcasted_inselt_to_and_from_FP_uses2(
+; ANY-NEXT:    [[I:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; ANY-NEXT:    [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x float>
+; ANY-NEXT:    call void @use_v4f32(<4 x float> [[B]])
+; ANY-NEXT:    [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
+; ANY-NEXT:    ret float [[R]]
+;
+  %i = insertelement <2 x double> undef, double %x, i32 0
+  %b = bitcast <2 x double> %i to <4 x float>
+  call void @use_v4f32(<4 x float> %b)
+  %r = extractelement <4 x float> %b, i32 1
+  ret float %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/extractinsert-tbaa.ll b/llvm/test/Transforms/InstCombine/extractinsert-tbaa.ll
new file mode 100644
index 00000000000..b2a3a1a1bf9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/extractinsert-tbaa.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+
+%Complex = type { double, double }
+
+; Check that instcombine preserves TBAA when narrowing loads
+define double @teststructextract(%Complex *%val) {
+; CHECK: load double, {{.*}}, !tbaa
+; CHECK-NOT: load %Complex
+    %loaded = load %Complex, %Complex *%val, !tbaa !1
+    %real = extractvalue %Complex %loaded, 0
+    ret double %real
+}
+
+define double @testarrayextract([2 x double] *%val) {
+; CHECK: load double, {{.*}}, !tbaa
+; CHECK-NOT: load [2 x double]
+    %loaded = load [2 x double], [2 x double] *%val, !tbaa !1
+    %real = extractvalue [2 x double] %loaded, 0
+    ret double %real
+}
+
+; Check that inscombine preserves TBAA when breaking up stores
+define void @teststructinsert(%Complex *%loc, double %a, double %b) {
+; CHECK: store double %a, {{.*}}, !tbaa
+; CHECK: store double %b, {{.*}}, !tbaa
+; CHECK-NOT: store %Complex
+    %inserted  = insertvalue %Complex undef,      double %a, 0
+    %inserted2 = insertvalue %Complex %inserted,  double %b, 1
+    store %Complex %inserted2, %Complex *%loc, !tbaa !1
+    ret void
+}
+
+define void @testarrayinsert([2 x double] *%loc, double %a, double %b) {
+; CHECK: store double %a, {{.*}}, !tbaa
+; CHECK: store double %b, {{.*}}, !tbaa
+; CHECK-NOT: store [2 x double]
+    %inserted  = insertvalue [2 x double] undef,      double %a, 0
+    %inserted2 = insertvalue [2 x double] %inserted,  double %b, 1
+    store [2 x double] %inserted2, [2 x double] *%loc, !tbaa !1
+    ret void
+}
+
+!0 = !{!"tbaa_root"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"Complex", !0, i64 0}
diff --git a/llvm/test/Transforms/InstCombine/extractvalue.ll b/llvm/test/Transforms/InstCombine/extractvalue.ll
new file mode 100644
index 00000000000..9c293581a06
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/extractvalue.ll
@@ -0,0 +1,107 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @bar({i32, i32} %a)
+declare i32 @baz(i32 %a)
+
+; CHECK-LABEL: define i32 @foo(
+; CHECK-NOT: extractvalue
+define i32 @foo(i32 %a, i32 %b) {
+; Instcombine should fold various combinations of insertvalue and extractvalue
+; together
+        ; Build a simple struct and pull values out again
+        %s1.1 = insertvalue {i32, i32} undef, i32 %a, 0
+        %s1 = insertvalue {i32, i32} %s1.1, i32 %b, 1
+        %v1 = extractvalue {i32, i32} %s1, 0
+        %v2 = extractvalue {i32, i32} %s1, 1
+
+        ; Build a nested struct and pull a sub struct out of it
+        ; This requires instcombine to insert a few insertvalue instructions
+        %ns1.1 = insertvalue {i32, {i32, i32}} undef, i32 %v1, 0
+        %ns1.2 = insertvalue {i32, {i32, i32}} %ns1.1, i32 %v1, 1, 0
+        %ns1   = insertvalue {i32, {i32, i32}} %ns1.2, i32 %v2, 1, 1
+        %s2    = extractvalue {i32, {i32, i32}} %ns1, 1
+        %v3    = extractvalue {i32, {i32, i32}} %ns1, 1, 1
+        call void @bar({i32, i32} %s2)
+
+        ; Use nested extractvalues to get to a value
+        %s3    = extractvalue {i32, {i32, i32}} %ns1, 1
+        %v4    = extractvalue {i32, i32} %s3, 1
+        call void @bar({i32, i32} %s3)
+
+        ; Use nested insertvalues to build a nested struct
+        %s4.1 = insertvalue {i32, i32} undef, i32 %v3, 0
+        %s4   = insertvalue {i32, i32} %s4.1, i32 %v4, 1
+        %ns2  = insertvalue {i32, {i32, i32}} undef, {i32, i32} %s4, 1
+
+        ; And now extract a single value from there
+        %v5   = extractvalue {i32, {i32, i32}} %ns2, 1, 1
+
+        ret i32 %v5
+}
+
+; CHECK-LABEL: define i32 @extract2gep(
+; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %pair, i64 0, i32 1
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32, i32* [[GEP]]
+; CHECK-NEXT: store
+; CHECK-NEXT: br label %loop
+; CHECK-NOT: extractvalue
+; CHECK: call {{.*}}(i32 [[LOAD]])
+; CHECK-NOT: extractvalue
+; CHECK: ret i32 [[LOAD]]
+define i32 @extract2gep({i16, i32}* %pair, i32* %P) {
+        ; The load + extractvalue should be converted
+        ; to an inbounds gep + smaller load.
+        ; The new load should be in the same spot as the old load.
+        %L = load {i16, i32}, {i16, i32}* %pair
+        store i32 0, i32* %P
+        br label %loop
+
+loop:
+        %E = extractvalue {i16, i32} %L, 1
+        %C = call i32 @baz(i32 %E)
+        store i32 %C, i32* %P
+        %cond = icmp eq i32 %C, 0
+        br i1 %cond, label %end, label %loop
+
+end:
+        ret i32 %E
+}
+
+; CHECK-LABEL: define i16 @doubleextract2gep(
+; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %arg, i64 0, i32 1, i32 1
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i16, i16* [[GEP]]
+; CHECK-NEXT: ret i16 [[LOAD]]
+define i16 @doubleextract2gep({i16, {i32, i16}}* %arg) {
+        ; The load + extractvalues should be converted
+        ; to a 3-index inbounds gep + smaller load.
+        %L = load {i16, {i32, i16}}, {i16, {i32, i16}}* %arg
+        %E1 = extractvalue {i16, {i32, i16}} %L, 1
+        %E2 = extractvalue {i32, i16} %E1, 1
+        ret i16 %E2
+}
+
+; CHECK: define i32 @nogep-multiuse
+; CHECK-NEXT: load {{.*}} %pair
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+define i32 @nogep-multiuse({i32, i32}* %pair) {
+        ; The load should be left unchanged since both parts are needed.
+        %L = load volatile {i32, i32}, {i32, i32}* %pair
+        %LHS = extractvalue {i32, i32} %L, 0
+        %RHS = extractvalue {i32, i32} %L, 1
+        %R = add i32 %LHS, %RHS
+        ret i32 %R
+}
+
+; CHECK: define i32 @nogep-volatile
+; CHECK-NEXT: load volatile {{.*}} %pair
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: ret
+define i32 @nogep-volatile({i32, i32}* %pair) {
+        ; The load volatile should be left unchanged.
+        %L = load volatile {i32, i32}, {i32, i32}* %pair
+        %E = extractvalue {i32, i32} %L, 1
+        ret i32 %E
+}
diff --git a/llvm/test/Transforms/InstCombine/fabs-libcall.ll b/llvm/test/Transforms/InstCombine/fabs-libcall.ll
new file mode 100644
index 00000000000..90902bb2fd0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fabs-libcall.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -mtriple=i686-apple-macosx -instcombine %s | FileCheck %s
+
+declare x86_fp80 @fabsl(x86_fp80)
+
+define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) {
+; CHECK-LABEL: @replace_fabs_call_f80(
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
+; CHECK-NEXT:    ret x86_fp80 [[TMP1]]
+;
+  %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x)
+  ret x86_fp80 %fabsl
+}
+
+define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) {
+; CHECK-LABEL: @fmf_replace_fabs_call_f80(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
+; CHECK-NEXT:    ret x86_fp80 [[TMP1]]
+;
+  %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x)
+  ret x86_fp80 %fabsl
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
new file mode 100644
index 00000000000..2dcdc52210a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -0,0 +1,420 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -instcombine -S | FileCheck %s
+
+; Make sure libcalls are replaced with intrinsic calls.
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+declare float @fabsf(float)
+declare double @fabs(double)
+declare fp128 @fabsl(fp128)
+declare float @llvm.fma.f32(float, float, float)
+declare float @llvm.fmuladd.f32(float, float, float)
+
+define float @replace_fabs_call_f32(float %x) {
+; CHECK-LABEL: @replace_fabs_call_f32(
+; CHECK-NEXT:    [[FABSF:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[FABSF]]
+;
+  %fabsf = tail call float @fabsf(float %x)
+  ret float %fabsf
+}
+
+define double @replace_fabs_call_f64(double %x) {
+; CHECK-LABEL: @replace_fabs_call_f64(
+; CHECK-NEXT:    [[FABS:%.*]] = call double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[FABS]]
+;
+  %fabs = tail call double @fabs(double %x)
+  ret double %fabs
+}
+
+define fp128 @replace_fabs_call_f128(fp128 %x) {
+; CHECK-LABEL: @replace_fabs_call_f128(
+; CHECK-NEXT:    [[FABSL:%.*]] = call fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
+; CHECK-NEXT:    ret fp128 [[FABSL]]
+;
+  %fabsl = tail call fp128 @fabsl(fp128 %x)
+  ret fp128 %fabsl
+}
+
+; Make sure fast math flags are preserved when replacing the libcall.
+define float @fmf_replace_fabs_call_f32(float %x) {
+; CHECK-LABEL: @fmf_replace_fabs_call_f32(
+; CHECK-NEXT:    [[FABSF:%.*]] = call nnan float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[FABSF]]
+;
+  %fabsf = tail call nnan float @fabsf(float %x)
+  ret float %fabsf
+}
+
+; Make sure all intrinsic calls are eliminated when the input is known
+; positive.
+
+; The fabs cannot be eliminated because %x may be a NaN
+
+define float @square_fabs_intrinsic_f32(float %x) {
+; CHECK-LABEL: @square_fabs_intrinsic_f32(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[FABSF:%.*]] = tail call float @llvm.fabs.f32(float [[MUL]])
+; CHECK-NEXT:    ret float [[FABSF]]
+;
+  %mul = fmul float %x, %x
+  %fabsf = tail call float @llvm.fabs.f32(float %mul)
+  ret float %fabsf
+}
+
+define double @square_fabs_intrinsic_f64(double %x) {
+; CHECK-LABEL: @square_fabs_intrinsic_f64(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[MUL]])
+; CHECK-NEXT:    ret double [[FABS]]
+;
+  %mul = fmul double %x, %x
+  %fabs = tail call double @llvm.fabs.f64(double %mul)
+  ret double %fabs
+}
+
+define fp128 @square_fabs_intrinsic_f128(fp128 %x) {
+; CHECK-LABEL: @square_fabs_intrinsic_f128(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fp128 [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[FABSL:%.*]] = tail call fp128 @llvm.fabs.f128(fp128 [[MUL]])
+; CHECK-NEXT:    ret fp128 [[FABSL]]
+;
+  %mul = fmul fp128 %x, %x
+  %fabsl = tail call fp128 @llvm.fabs.f128(fp128 %mul)
+  ret fp128 %fabsl
+}
+
+define float @square_nnan_fabs_intrinsic_f32(float %x) {
+; CHECK-LABEL: @square_nnan_fabs_intrinsic_f32(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan float [[X:%.*]], [[X]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %mul = fmul nnan float %x, %x
+  %fabsf = call float @llvm.fabs.f32(float %mul)
+  ret float %fabsf
+}
+
+; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
+
+define float @square_fabs_shrink_call1(float %x) {
+; CHECK-LABEL: @square_fabs_shrink_call1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = call float @llvm.fabs.f32(float [[TMP1]])
+; CHECK-NEXT:    ret float [[TRUNC]]
+;
+  %ext = fpext float %x to double
+  %sq = fmul double %ext, %ext
+  %fabs = call double @fabs(double %sq)
+  %trunc = fptrunc double %fabs to float
+  ret float %trunc
+}
+
+define float @square_fabs_shrink_call2(float %x) {
+; CHECK-LABEL: @square_fabs_shrink_call2(
+; CHECK-NEXT:    [[SQ:%.*]] = fmul float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = call float @llvm.fabs.f32(float [[SQ]])
+; CHECK-NEXT:    ret float [[TRUNC]]
+;
+  %sq = fmul float %x, %x
+  %ext = fpext float %sq to double
+  %fabs = call double @fabs(double %ext)
+  %trunc = fptrunc double %fabs to float
+  ret float %trunc
+}
+
+define float @fabs_select_constant_negative_positive(i32 %c) {
+; CHECK-LABEL: @fabs_select_constant_negative_positive(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
+; CHECK-NEXT:    [[FABS:%.*]] = select i1 [[CMP]], float 1.000000e+00, float 2.000000e+00
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -1.0, float 2.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+define float @fabs_select_constant_positive_negative(i32 %c) {
+; CHECK-LABEL: @fabs_select_constant_positive_negative(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
+; CHECK-NEXT:    [[FABS:%.*]] = select i1 [[CMP]], float 1.000000e+00, float 2.000000e+00
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float -2.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+define float @fabs_select_constant_negative_negative(i32 %c) {
+; CHECK-LABEL: @fabs_select_constant_negative_negative(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
+; CHECK-NEXT:    [[FABS:%.*]] = select i1 [[CMP]], float 1.000000e+00, float 2.000000e+00
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -1.0, float -2.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+define float @fabs_select_constant_neg0(i32 %c) {
+; CHECK-LABEL: @fabs_select_constant_neg0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+define float @fabs_select_var_constant_negative(i32 %c, float %x) {
+; CHECK-LABEL: @fabs_select_var_constant_negative(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], float [[X:%.*]], float -1.000000e+00
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SELECT]])
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float %x, float -1.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; The fabs cannot be eliminated because %x may be a NaN
+
+define float @square_fma_fabs_intrinsic_f32(float %x) {
+; CHECK-LABEL: @square_fma_fabs_intrinsic_f32(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[X]], float 1.000000e+00)
+; CHECK-NEXT:    [[FABSF:%.*]] = call float @llvm.fabs.f32(float [[FMA]])
+; CHECK-NEXT:    ret float [[FABSF]]
+;
+  %fma = call float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+}
+
+; The fabs cannot be eliminated because %x may be a NaN
+
+define float @square_nnan_fma_fabs_intrinsic_f32(float %x) {
+; CHECK-LABEL: @square_nnan_fma_fabs_intrinsic_f32(
+; CHECK-NEXT:    [[FMA:%.*]] = call nnan float @llvm.fma.f32(float [[X:%.*]], float [[X]], float 1.000000e+00)
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+}
+
+define float @square_fmuladd_fabs_intrinsic_f32(float %x) {
+; CHECK-LABEL: @square_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float [[X]], float 1.000000e+00)
+; CHECK-NEXT:    [[FABSF:%.*]] = call float @llvm.fabs.f32(float [[FMULADD]])
+; CHECK-NEXT:    ret float [[FABSF]]
+;
+  %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+}
+
+define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
+; CHECK-LABEL: @square_nnan_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call nnan float @llvm.fmuladd.f32(float [[X:%.*]], float [[X]], float 1.000000e+00)
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+}
+
+; Don't introduce a second fpext
+
+define double @multi_use_fabs_fpext(float %x) {
+; CHECK-LABEL: @multi_use_fabs_fpext(
+; CHECK-NEXT:    [[FPEXT:%.*]] = fpext float [[X:%.*]] to double
+; CHECK-NEXT:    [[FABS:%.*]] = call double @llvm.fabs.f64(double [[FPEXT]])
+; CHECK-NEXT:    store volatile double [[FPEXT]], double* undef, align 8
+; CHECK-NEXT:    ret double [[FABS]]
+;
+  %fpext = fpext float %x to double
+  %fabs = call double @llvm.fabs.f64(double %fpext)
+  store volatile double %fpext, double* undef
+  ret double %fabs
+}
+
+; Negative test for the fabs folds below: we require nnan, so
+; we won't always clear the sign bit of a NaN value.
+
+define double @select_fcmp_ole_zero(double %x) {
+; CHECK-LABEL: @select_fcmp_ole_zero(
+; CHECK-NEXT:    [[LEZERO:%.*]] = fcmp ole double [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub double 0.000000e+00, [[X]]
+; CHECK-NEXT:    [[FABS:%.*]] = select i1 [[LEZERO]], double [[NEGX]], double [[X]]
+; CHECK-NEXT:    ret double [[FABS]]
+;
+  %lezero = fcmp ole double %x, 0.0
+  %negx = fsub double 0.0, %x
+  %fabs = select i1 %lezero, double %negx, double %x
+  ret double %fabs
+}
+
+; X <= 0.0 ? (0.0 - X) : X --> fabs(X)
+
+define double @select_fcmp_nnan_ole_zero(double %x) {
+; CHECK-LABEL: @select_fcmp_nnan_ole_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %lezero = fcmp nnan ole double %x, 0.0
+  %negx = fsub double 0.0, %x
+  %fabs = select i1 %lezero, double %negx, double %x
+  ret double %fabs
+}
+
+; X <= -0.0 ? (0.0 - X) : X --> fabs(X)
+
+define <2 x float> @select_fcmp_nnan_ole_negzero(<2 x float> %x) {
+; CHECK-LABEL: @select_fcmp_nnan_ole_negzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
+  %lezero = fcmp nnan ole <2 x float> %x, <float -0.0, float -0.0>
+  %negx = fsub <2 x float> <float 0.0, float undef>, %x
+  %fabs = select <2 x i1> %lezero, <2 x float> %negx, <2 x float> %x
+  ret <2 x float> %fabs
+}
+
+; X > 0.0 ? X : (0.0 - X) --> fabs(X)
+
+define fp128 @select_fcmp_nnan_ogt_zero(fp128 %x) {
+; CHECK-LABEL: @select_fcmp_nnan_ogt_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
+; CHECK-NEXT:    ret fp128 [[TMP1]]
+;
+  %gtzero = fcmp nnan ogt fp128 %x, zeroinitializer
+  %negx = fsub fp128 zeroinitializer, %x
+  %fabs = select i1 %gtzero, fp128 %x, fp128 %negx
+  ret fp128 %fabs
+}
+
+; X > -0.0 ? X : (0.0 - X) --> fabs(X)
+
+define half @select_fcmp_nnan_ogt_negzero(half %x) {
+; CHECK-LABEL: @select_fcmp_nnan_ogt_negzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:    ret half [[TMP1]]
+;
+  %gtzero = fcmp nnan ogt half %x, -0.0
+  %negx = fsub half 0.0, %x
+  %fabs = select i1 %gtzero, half %x, half %negx
+  ret half %fabs
+}
+
+; X < 0.0 ? -X : X --> fabs(X)
+
+define double @select_fcmp_nnan_nsz_olt_zero(double %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_olt_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %ltzero = fcmp nnan nsz olt double %x, 0.0
+  %negx = fsub double -0.0, %x
+  %fabs = select i1 %ltzero, double %negx, double %x
+  ret double %fabs
+}
+
+; X < -0.0 ? -X : X --> fabs(X)
+
+define float @select_fcmp_nnan_nsz_olt_negzero(float %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_olt_negzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %ltzero = fcmp nnan nsz ninf olt float %x, -0.0
+  %negx = fsub float -0.0, %x
+  %fabs = select i1 %ltzero, float %negx, float %x
+  ret float %fabs
+}
+
+; X <= 0.0 ? -X : X --> fabs(X)
+
+define double @select_fcmp_nnan_nsz_ole_zero(double %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_ole_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %lezero = fcmp fast ole double %x, 0.0
+  %negx = fsub double -0.0, %x
+  %fabs = select i1 %lezero, double %negx, double %x
+  ret double %fabs
+}
+
+; X <= -0.0 ? -X : X --> fabs(X)
+
+define float @select_fcmp_nnan_nsz_ole_negzero(float %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_ole_negzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %lezero = fcmp nnan nsz ole float %x, -0.0
+  %negx = fsub float -0.0, %x
+  %fabs = select i1 %lezero, float %negx, float %x
+  ret float %fabs
+}
+
+; X > 0.0 ? X : (0.0 - X) --> fabs(X)
+
+define <2 x float> @select_fcmp_nnan_nsz_ogt_zero(<2 x float> %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_ogt_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz arcp <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
+  %gtzero = fcmp nnan nsz arcp ogt <2 x float> %x, zeroinitializer
+  %negx = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %fabs = select <2 x i1> %gtzero, <2 x float> %x, <2 x float> %negx
+  ret <2 x float> %fabs
+}
+
+; X > -0.0 ? X : (0.0 - X) --> fabs(X)
+
+define half @select_fcmp_nnan_nsz_ogt_negzero(half %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_ogt_negzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:    ret half [[TMP1]]
+;
+  %gtzero = fcmp fast ogt half %x, -0.0
+  %negx = fsub half 0.0, %x
+  %fabs = select i1 %gtzero, half %x, half %negx
+  ret half %fabs
+}
+
+; X > 0.0 ? X : (0.0 - X) --> fabs(X)
+
+define <2 x double> @select_fcmp_nnan_nsz_oge_zero(<2 x double> %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_oge_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc nnan nsz <2 x double> @llvm.fabs.v2f64(<2 x double> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %gezero = fcmp nnan nsz reassoc oge <2 x double> %x, zeroinitializer
+  %negx = fsub <2 x double> <double -0.0, double -0.0>, %x
+  %fabs = select <2 x i1> %gezero, <2 x double> %x, <2 x double> %negx
+  ret <2 x double> %fabs
+}
+
+; X > -0.0 ? X : (0.0 - X) --> fabs(X)
+
+define half @select_fcmp_nnan_nsz_oge_negzero(half %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_oge_negzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:    ret half [[TMP1]]
+;
+  %gezero = fcmp nnan nsz oge half %x, -0.0
+  %negx = fsub half -0.0, %x
+  %fabs = select i1 %gezero, half %x, half %negx
+  ret half %fabs
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fadd-fsub-factor.ll b/llvm/test/Transforms/InstCombine/fadd-fsub-factor.ll
new file mode 100644
index 00000000000..09104e53e06
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fadd-fsub-factor.ll
@@ -0,0 +1,473 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; =========================================================================
+;
+;   Test FP factorization with patterns:
+;   X * Z + Y * Z --> (X + Y) * Z (including all 4 commuted variants)
+;   X * Z - Y * Z --> (X - Y) * Z (including all 4 commuted variants)
+;   X / Z + Y / Z --> (X + Y) / Z
+;   X / Z - Y / Z --> (X - Y) / Z
+;
+; =========================================================================
+
+; Minimum FMF - the final result requires/propagates FMF.
+
+define float @fmul_fadd(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fadd(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul float %x, %z
+  %t2 = fmul float %y, %z
+  %r = fadd reassoc nsz float %t1, %t2
+  ret float %r
+}
+
+; Verify vector types and commuted operands.
+
+define <2 x float> @fmul_fadd_commute1_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fmul_fadd_commute1_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nsz <2 x float> [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = fmul <2 x float> %z, %x
+  %t2 = fmul <2 x float> %z, %y
+  %r = fadd reassoc nsz <2 x float> %t1, %t2
+  ret <2 x float> %r
+}
+
+; Verify vector types, commuted operands, FMF propagation.
+
+define <2 x float> @fmul_fadd_commute2_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fmul_fadd_commute2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc ninf nsz <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc ninf nsz <2 x float> [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = fmul fast <2 x float> %x, %z
+  %t2 = fmul nnan <2 x float> %z, %y
+  %r = fadd reassoc nsz ninf <2 x float> %t1, %t2
+  ret <2 x float> %r
+}
+
+; Verify different scalar type, commuted operands, FMF propagation.
+
+define double @fmul_fadd_commute3(double %x, double %y, double %z) {
+; CHECK-LABEL: @fmul_fadd_commute3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nnan nsz double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nnan nsz double [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret double [[R]]
+;
+  %t1 = fmul double %z, %x
+  %t2 = fmul fast double %y, %z
+  %r = fadd reassoc nsz nnan double %t1, %t2
+  ret double %r
+}
+
+; Negative test - verify the fold is not done with only 'reassoc' ('nsz' is required).
+
+define float @fmul_fadd_not_enough_FMF(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fadd_not_enough_FMF(
+; CHECK-NEXT:    [[T1:%.*]] = fmul fast float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fmul fast float [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fadd reassoc float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul fast float %x, %z
+  %t2 = fmul fast float %y, %z
+  %r = fadd reassoc float %t1, %t2
+  ret float %r
+}
+
+declare void @use(float)
+
+; Negative test - extra uses should disable the fold.
+
+define float @fmul_fadd_uses1(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fadd_uses1(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[Z:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fmul float [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fadd reassoc nsz float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T1]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul float %z, %x
+  %t2 = fmul float %y, %z
+  %r = fadd reassoc nsz float %t1, %t2
+  call void @use(float %t1)
+  ret float %r
+}
+
+; Negative test - extra uses should disable the fold.
+
+define float @fmul_fadd_uses2(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fadd_uses2(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[Z:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fmul float [[Z]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fadd reassoc nsz float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul float %z, %x
+  %t2 = fmul float %z, %y
+  %r = fadd reassoc nsz float %t1, %t2
+  call void @use(float %t2)
+  ret float %r
+}
+
+; Negative test - extra uses should disable the fold.
+
+define float @fmul_fadd_uses3(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fadd_uses3(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fmul float [[Z]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fadd reassoc nsz float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T1]])
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul float %x, %z
+  %t2 = fmul float %z, %y
+  %r = fadd reassoc nsz float %t1, %t2
+  call void @use(float %t1)
+  call void @use(float %t2)
+  ret float %r
+}
+
+; Minimum FMF - the final result requires/propagates FMF.
+
+define half @fmul_fsub(half %x, half %y, half %z) {
+; CHECK-LABEL: @fmul_fsub(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz half [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nsz half [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret half [[R]]
+;
+  %t1 = fmul half %x, %z
+  %t2 = fmul half %y, %z
+  %r = fsub reassoc nsz half %t1, %t2
+  ret half %r
+}
+
+; Verify vector types and commuted operands.
+
+define <2 x float> @fmul_fsub_commute1_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fmul_fsub_commute1_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nsz <2 x float> [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = fmul <2 x float> %z, %x
+  %t2 = fmul <2 x float> %y, %z
+  %r = fsub reassoc nsz <2 x float> %t1, %t2
+  ret <2 x float> %r
+}
+
+; Verify vector types, commuted operands, FMF propagation.
+
+define <2 x float> @fmul_fsub_commute2_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fmul_fsub_commute2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc ninf nsz <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc ninf nsz <2 x float> [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = fmul fast <2 x float> %x, %z
+  %t2 = fmul nnan <2 x float> %z, %y
+  %r = fsub reassoc nsz ninf <2 x float> %t1, %t2
+  ret <2 x float> %r
+}
+
+; Verify different scalar type, commuted operands, FMF propagation.
+
+define double @fmul_fsub_commute3(double %x, double %y, double %z) {
+; CHECK-LABEL: @fmul_fsub_commute3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nnan nsz double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nnan nsz double [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret double [[R]]
+;
+  %t1 = fmul double %z, %x
+  %t2 = fmul fast double %z, %y
+  %r = fsub reassoc nsz nnan double %t1, %t2
+  ret double %r
+}
+
+; Negative test - verify the fold is not done with only 'nsz' ('reassoc' is required).
+
+define float @fmul_fsub_not_enough_FMF(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fsub_not_enough_FMF(
+; CHECK-NEXT:    [[T1:%.*]] = fmul fast float [[Z:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fmul fast float [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fsub nsz float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul fast float %z, %x
+  %t2 = fmul fast float %y, %z
+  %r = fsub nsz float %t1, %t2
+  ret float %r
+}
+
+; Negative test - extra uses should disable the fold.
+
+define float @fmul_fsub_uses1(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fsub_uses1(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fmul float [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T1]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul float %x, %z
+  %t2 = fmul float %y, %z
+  %r = fsub reassoc nsz float %t1, %t2
+  call void @use(float %t1)
+  ret float %r
+}
+
+; Negative test - extra uses should disable the fold.
+
+define float @fmul_fsub_uses2(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fsub_uses2(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[Z:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fmul float [[Z]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul float %z, %x
+  %t2 = fmul float %z, %y
+  %r = fsub reassoc nsz float %t1, %t2
+  call void @use(float %t2)
+  ret float %r
+}
+
+; Negative test - extra uses should disable the fold.
+
+define float @fmul_fsub_uses3(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmul_fsub_uses3(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fmul float [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T1]])
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fmul float %x, %z
+  %t2 = fmul float %y, %z
+  %r = fsub reassoc nsz float %t1, %t2
+  call void @use(float %t1)
+  call void @use(float %t2)
+  ret float %r
+}
+
+; Common divisor
+
+define double @fdiv_fadd(double %x, double %y, double %z) {
+; CHECK-LABEL: @fdiv_fadd(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc nsz double [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret double [[R]]
+;
+  %t1 = fdiv double %x, %z
+  %t2 = fdiv double %y, %z
+  %r = fadd reassoc nsz double %t1, %t2
+  ret double %r
+}
+
+define float @fdiv_fsub(float %x, float %y, float %z) {
+; CHECK-LABEL: @fdiv_fsub(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc nsz float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float %x, %z
+  %t2 = fdiv nnan float %y, %z
+  %r = fsub reassoc nsz float %t1, %t2
+  ret float %r
+}
+
+; Verify vector types.
+
+define <2 x double> @fdiv_fadd_vec(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
+; CHECK-LABEL: @fdiv_fadd_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz <2 x double> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc nsz <2 x double> [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %t1 = fdiv fast <2 x double> %x, %z
+  %t2 = fdiv <2 x double> %y, %z
+  %r = fadd reassoc nsz <2 x double> %t1, %t2
+  ret <2 x double> %r
+}
+
+; Verify vector types.
+
+define <2 x float> @fdiv_fsub_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fdiv_fsub_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc nsz <2 x float> [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = fdiv <2 x float> %x, %z
+  %t2 = fdiv nnan <2 x float> %y, %z
+  %r = fsub reassoc nsz <2 x float> %t1, %t2
+  ret <2 x float> %r
+}
+
+; Negative test - common operand is not divisor.
+
+define float @fdiv_fadd_commute1(float %x, float %y, float %z) {
+; CHECK-LABEL: @fdiv_fadd_commute1(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Z]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fadd fast float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float %z, %y
+  %t2 = fdiv fast float %z, %x
+  %r = fadd fast float %t1, %t2
+  ret float %r
+}
+
+; Negative test - common operand is not divisor.
+
+define float @fdiv_fsub_commute2(float %x, float %y, float %z) {
+; CHECK-LABEL: @fdiv_fsub_commute2(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[X:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float %z, %y
+  %t2 = fdiv fast float %x, %z
+  %r = fsub fast float %t1, %t2
+  ret float %r
+}
+
+; Negative test - verify the fold is not done with only 'nsz' ('reassoc' is required).
+
+define float @fdiv_fadd_not_enough_FMF(float %x, float %y, float %z) {
+; CHECK-LABEL: @fdiv_fadd_not_enough_FMF(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[T3:%.*]] = fadd nsz float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv fast float %y, %x
+  %t2 = fdiv fast float %z, %x
+  %t3 = fadd nsz float %t1, %t2
+  ret float %t3
+}
+
+; Negative test - verify the fold is not done with only 'reassoc' ('nsz' is required).
+
+define float @fdiv_fsub_not_enough_FMF(float %x, float %y, float %z) {
+; CHECK-LABEL: @fdiv_fsub_not_enough_FMF(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv fast float %y, %x
+  %t2 = fdiv fast float %z, %x
+  %t3 = fsub reassoc float %t1, %t2
+  ret float %t3
+}
+
+; Negative test - extra uses should disable the fold.
+
+define float @fdiv_fadd_uses1(float %x, float %y, float %z) {
+; CHECK-LABEL: @fdiv_fadd_uses1(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fadd fast float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T1]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float %x, %z
+  %t2 = fdiv fast float %y, %z
+  %r = fadd fast float %t1, %t2
+  call void @use(float %t1)
+  ret float %r
+}
+
+; Negative test - extra uses should disable the fold.
+
+define float @fdiv_fsub_uses2(float %x, float %y, float %z) {
+; CHECK-LABEL: @fdiv_fsub_uses2(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float %x, %z
+  %t2 = fdiv fast float %y, %z
+  %r = fsub fast float %t1, %t2
+  call void @use(float %t2)
+  ret float %r
+}
+
+; Negative test - extra uses should disable the fold.
+
+define float @fdiv_fsub_uses3(float %x, float %y, float %z) {
+; CHECK-LABEL: @fdiv_fsub_uses3(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T1]])
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float %x, %z
+  %t2 = fdiv fast float %y, %z
+  %r = fsub fast float %t1, %t2
+  call void @use(float %t1)
+  call void @use(float %t2)
+  ret float %r
+}
+
+; Constants are fine to combine if they are not denorms.
+
+define float @fdiv_fadd_not_denorm(float %x) {
+; CHECK-LABEL: @fdiv_fadd_not_denorm(
+; CHECK-NEXT:    [[R:%.*]] = fdiv fast float 0x3818000000000000, [[X:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float 0x3810000000000000, %x
+  %t2 = fdiv fast float 0x3800000000000000, %x
+  %r = fadd fast float %t1, %t2
+  ret float %r
+}
+
+; Negative test - disabled if x+y is denormal.
+
+define float @fdiv_fadd_denorm(float %x) {
+; CHECK-LABEL: @fdiv_fadd_denorm(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float 0xB810000000000000, [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float 0x3800000000000000, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = fadd fast float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float 0xB810000000000000, %x
+  %t2 = fdiv fast float 0x3800000000000000, %x
+  %r = fadd fast float %t1, %t2
+  ret float %r
+}
+
+; Negative test - disabled if x-y is denormal.
+
+define float @fdiv_fsub_denorm(float %x) {
+; CHECK-LABEL: @fdiv_fsub_denorm(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float 0x3810000000000000, [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float 0x3800000000000000, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv fast float 0x3810000000000000, %x
+  %t2 = fdiv fast float 0x3800000000000000, %x
+  %r = fsub fast float %t1, %t2
+  ret float %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fadd.ll b/llvm/test/Transforms/InstCombine/fadd.ll
new file mode 100644
index 00000000000..f7eac8e14b6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fadd.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; -x + y => y - x
+
+define float @fneg_op0(float %x, float %y) {
+; CHECK-LABEL: @fneg_op0(
+; CHECK-NEXT:    [[ADD:%.*]] = fsub float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %neg = fsub float -0.0, %x
+  %add = fadd float %neg, %y
+  ret float %add
+}
+
+; x + -y => x - y
+
+define float @fneg_op1(float %x, float %y) {
+; CHECK-LABEL: @fneg_op1(
+; CHECK-NEXT:    [[ADD:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %neg = fsub float -0.0, %y
+  %add = fadd float %x, %neg
+  ret float %add
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
new file mode 100644
index 00000000000..b5173cc467c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -0,0 +1,931 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
+; 1.2f and 2.3f is supposed to be fold.
+define float @fold(float %a) {
+; CHECK-LABEL: @fold(
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
+; CHECK-NEXT:    ret float [[MUL1]]
+;
+  %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul1 = fmul fast float %mul, 0x4002666660000000
+  ret float %mul1
+}
+
+; Same testing-case as the one used in fold() except that the operators have
+; fixed FP mode.
+define float @notfold(float %a) {
+; CHECK-LABEL: @notfold(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
+; CHECK-NEXT:    ret float [[MUL1]]
+;
+  %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul1 = fmul float %mul, 0x4002666660000000
+  ret float %mul1
+}
+
+define float @fold2(float %a) {
+; CHECK-LABEL: @fold2(
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
+; CHECK-NEXT:    ret float [[MUL1]]
+;
+  %mul = fmul float %a, 0x3FF3333340000000
+  %mul1 = fmul fast float %mul, 0x4002666660000000
+  ret float %mul1
+}
+
+; C * f1 + f1 = (C+1) * f1
+; TODO: The particular case where C is 2 (so the folded result is 3.0*f1) is
+; always safe, and so doesn't need any FMF.
+; That is, (x + x + x) and (3*x) each have only a single rounding.
+define double @fold3(double %f1) {
+; CHECK-LABEL: @fold3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[F1:%.*]], 6.000000e+00
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %t1 = fmul fast double 5.000000e+00, %f1
+  %t2 = fadd fast double %f1, %t1
+  ret double %t2
+}
+
+; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
+define double @fold3_reassoc_nsz(double %f1) {
+; CHECK-LABEL: @fold3_reassoc_nsz(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz double [[F1:%.*]], 6.000000e+00
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %t1 = fmul reassoc nsz double 5.000000e+00, %f1
+  %t2 = fadd reassoc nsz double %f1, %t1
+  ret double %t2
+}
+
+; TODO: This doesn't require 'nsz'.  It should fold to f1 * 6.0.
+define double @fold3_reassoc(double %f1) {
+; CHECK-LABEL: @fold3_reassoc(
+; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc double [[F1:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc double [[T1]], [[F1]]
+; CHECK-NEXT:    ret double [[T2]]
+;
+  %t1 = fmul reassoc double 5.000000e+00, %f1
+  %t2 = fadd reassoc double %f1, %t1
+  ret double %t2
+}
+
+; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
+define float @fold4(float %f1, float %f2) {
+; CHECK-LABEL: @fold4(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float 9.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %sub = fsub float 4.000000e+00, %f1
+  %sub1 = fsub float 5.000000e+00, %f2
+  %add = fadd fast float %sub, %sub1
+  ret float %add
+}
+
+; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
+define float @fold4_reassoc_nsz(float %f1, float %f2) {
+; CHECK-LABEL: @fold4_reassoc_nsz(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc nsz float 9.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %sub = fsub float 4.000000e+00, %f1
+  %sub1 = fsub float 5.000000e+00, %f2
+  %add = fadd reassoc nsz float %sub, %sub1
+  ret float %add
+}
+
+; TODO: This doesn't require 'nsz'.  It should fold to (9.0 - (f1 + f2)).
+define float @fold4_reassoc(float %f1, float %f2) {
+; CHECK-LABEL: @fold4_reassoc(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float 4.000000e+00, [[F1:%.*]]
+; CHECK-NEXT:    [[SUB1:%.*]] = fsub float 5.000000e+00, [[F2:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd reassoc float [[SUB]], [[SUB1]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %sub = fsub float 4.000000e+00, %f1
+  %sub1 = fsub float 5.000000e+00, %f2
+  %add = fadd reassoc float %sub, %sub1
+  ret float %add
+}
+
+; (X + C1) + C2 => X + (C1 + C2)
+define float @fold5(float %f1) {
+; CHECK-LABEL: @fold5(
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[F1:%.*]], 9.000000e+00
+; CHECK-NEXT:    ret float [[ADD1]]
+;
+  %add = fadd float %f1, 4.000000e+00
+  %add1 = fadd fast float %add, 5.000000e+00
+  ret float %add1
+}
+
+; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
+define float @fold5_reassoc_nsz(float %f1) {
+; CHECK-LABEL: @fold5_reassoc_nsz(
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc nsz float [[F1:%.*]], 9.000000e+00
+; CHECK-NEXT:    ret float [[ADD1]]
+;
+  %add = fadd float %f1, 4.000000e+00
+  %add1 = fadd reassoc nsz float %add, 5.000000e+00
+  ret float %add1
+}
+
+; TODO: This doesn't require 'nsz'.  It should fold to f1 + 9.0
+define float @fold5_reassoc(float %f1) {
+; CHECK-LABEL: @fold5_reassoc(
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00
+; CHECK-NEXT:    ret float [[ADD1]]
+;
+  %add = fadd float %f1, 4.000000e+00
+  %add1 = fadd reassoc float %add, 5.000000e+00
+  ret float %add1
+}
+
+; (X + X) + X + X => 4.0 * X
+define float @fold6(float %f1) {
+; CHECK-LABEL: @fold6(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %t1 = fadd fast float %f1, %f1
+  %t2 = fadd fast float %f1, %t1
+  %t3 = fadd fast float %t2, %f1
+  ret float %t3
+}
+
+; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
+define float @fold6_reassoc_nsz(float %f1) {
+; CHECK-LABEL: @fold6_reassoc_nsz(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 4.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %t1 = fadd reassoc nsz float %f1, %f1
+  %t2 = fadd reassoc nsz float %f1, %t1
+  %t3 = fadd reassoc nsz float %t2, %f1
+  ret float %t3
+}
+
+; TODO: This doesn't require 'nsz'.  It should fold to f1 * 4.0.
+define float @fold6_reassoc(float %f1) {
+; CHECK-LABEL: @fold6_reassoc(
+; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
+; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[T1]], [[F1]]
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T2]], [[F1]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fadd reassoc float %f1, %f1
+  %t2 = fadd reassoc float %f1, %t1
+  %t3 = fadd reassoc float %t2, %f1
+  ret float %t3
+}
+
+; C1 * X + (X + X) = (C1 + 2) * X
+define float @fold7(float %f1) {
+; CHECK-LABEL: @fold7(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 7.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %t1 = fmul fast float %f1, 5.000000e+00
+  %t2 = fadd fast float %f1, %f1
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+}
+
+; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
+define float @fold7_reassoc_nsz(float %f1) {
+; CHECK-LABEL: @fold7_reassoc_nsz(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 7.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %t1 = fmul reassoc nsz float %f1, 5.000000e+00
+  %t2 = fadd reassoc nsz float %f1, %f1
+  %t3 = fadd reassoc nsz float %t1, %t2
+  ret float %t3
+}
+
+; TODO: This doesn't require 'nsz'.  It should fold to f1 * 7.0.
+define float @fold7_reassoc(float %f1) {
+; CHECK-LABEL: @fold7_reassoc(
+; CHECK-NEXT:    [[T1:%.*]] = fmul reassoc float [[F1:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F1]], [[F1]]
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul reassoc float %f1, 5.000000e+00
+  %t2 = fadd reassoc float %f1, %f1
+  %t3 = fadd reassoc float %t1, %t2
+  ret float %t3
+}
+
+; (X + X) + (X + X) + X => 5.0 * X
+define float @fold8(float %f1) {
+; CHECK-LABEL: @fold8(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 5.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %t1 = fadd fast float %f1, %f1
+  %t2 = fadd fast float %f1, %f1
+  %t3 = fadd fast float %t1, %t2
+  %t4 = fadd fast float %t3, %f1
+  ret float %t4
+}
+
+; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
+define float @fold8_reassoc_nsz(float %f1) {
+; CHECK-LABEL: @fold8_reassoc_nsz(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 5.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %t1 = fadd reassoc nsz float %f1, %f1
+  %t2 = fadd reassoc nsz float %f1, %f1
+  %t3 = fadd reassoc nsz float %t1, %t2
+  %t4 = fadd reassoc nsz float %t3, %f1
+  ret float %t4
+}
+
+; TODO: This doesn't require 'nsz'.  It should fold to f1 * 5.0.
+define float @fold8_reassoc(float %f1) {
+; CHECK-LABEL: @fold8_reassoc(
+; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
+; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F1]], [[F1]]
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
+; CHECK-NEXT:    [[T4:%.*]] = fadd reassoc float [[T3]], [[F1]]
+; CHECK-NEXT:    ret float [[T4]]
+;
+  %t1 = fadd reassoc float %f1, %f1
+  %t2 = fadd reassoc float %f1, %f1
+  %t3 = fadd reassoc float %t1, %t2
+  %t4 = fadd reassoc float %t3, %f1
+  ret float %t4
+}
+
+; Y - (X + Y) --> -X
+
+define float @fsub_fadd_common_op_fneg(float %x, float %y) {
+; CHECK-LABEL: @fsub_fadd_common_op_fneg(
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %a = fadd float %x, %y
+  %r = fsub fast float %y, %a
+  ret float %r
+}
+
+; Y - (X + Y) --> -X
+; Check again with 'reassoc' and 'nsz'.
+; nsz is required because: 0.0 - (0.0 + 0.0) -> 0.0, not -0.0
+
+define float @fsub_fadd_common_op_fneg_reassoc_nsz(float %x, float %y) {
+; CHECK-LABEL: @fsub_fadd_common_op_fneg_reassoc_nsz(
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %a = fadd float %x, %y
+  %r = fsub reassoc nsz float %y, %a
+  ret float %r
+}
+
+; Y - (X + Y) --> -X
+
+define <2 x float> @fsub_fadd_common_op_fneg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fsub_fadd_common_op_fneg_vec(
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = fadd <2 x float> %x, %y
+  %r = fsub nsz reassoc <2 x float> %y, %a
+  ret <2 x float> %r
+}
+
+; Y - (Y + X) --> -X
+; Commute operands of the 'add'.
+
+define float @fsub_fadd_common_op_fneg_commute(float %x, float %y) {
+; CHECK-LABEL: @fsub_fadd_common_op_fneg_commute(
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %a = fadd float %y, %x
+  %r = fsub reassoc nsz float %y, %a
+  ret float %r
+}
+
+; Y - (Y + X) --> -X
+
+define <2 x float> @fsub_fadd_common_op_fneg_commute_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fsub_fadd_common_op_fneg_commute_vec(
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = fadd <2 x float> %y, %x
+  %r = fsub reassoc nsz <2 x float> %y, %a
+  ret <2 x float> %r
+}
+
+; (Y - X) - Y --> -X
+; nsz is required because: (0.0 - 0.0) - 0.0 -> 0.0, not -0.0
+
+define float @fsub_fsub_common_op_fneg(float %x, float %y) {
+; CHECK-LABEL: @fsub_fsub_common_op_fneg(
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %s = fsub float %y, %x
+  %r = fsub reassoc nsz float %s, %y
+  ret float %r
+}
+
+; (Y - X) - Y --> -X
+
+define <2 x float> @fsub_fsub_common_op_fneg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fsub_fsub_common_op_fneg_vec(
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %s = fsub <2 x float> %y, %x
+  %r = fsub reassoc nsz <2 x float> %s, %y
+  ret <2 x float> %r
+}
+
+; TODO: This doesn't require 'nsz'.  It should fold to 0 - f2
+define float @fold9_reassoc(float %f1, float %f2) {
+; CHECK-LABEL: @fold9_reassoc(
+; CHECK-NEXT:    [[T1:%.*]] = fadd float [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float [[F1]], [[T1]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fadd float %f1, %f2
+  %t3 = fsub reassoc float %f1, %t1
+  ret float %t3
+}
+
+; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
+; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
+; top of resulting simplified expression tree may potentially reveal some
+; optimization opportunities in the super-expression trees.
+;
+define float @fold10(float %f1, float %f2) {
+; CHECK-LABEL: @fold10(
+; CHECK-NEXT:    [[T2:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T2]], -1.000000e+00
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fadd fast float 2.000000e+00, %f1
+  %t2 = fsub fast float %f2, 3.000000e+00
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+}
+
+; Check again with 'reassoc' and 'nsz'.
+; TODO: We may be able to remove the 'nsz' requirement.
+define float @fold10_reassoc_nsz(float %f1, float %f2) {
+; CHECK-LABEL: @fold10_reassoc_nsz(
+; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc nsz float [[T2]], -1.000000e+00
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fadd reassoc nsz float 2.000000e+00, %f1
+  %t2 = fsub reassoc nsz float %f2, 3.000000e+00
+  %t3 = fadd reassoc nsz float %t1, %t2
+  ret float %t3
+}
+
+; Observe that the fold is not done with only reassoc (the instructions are
+; canonicalized, but not folded).
+; TODO: As noted above, 'nsz' may not be required for this to be fully folded.
+define float @fold10_reassoc(float %f1, float %f2) {
+; CHECK-LABEL: @fold10_reassoc(
+; CHECK-NEXT:    [[T1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[T1]], [[T2]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fadd reassoc float 2.000000e+00, %f1
+  %t2 = fsub reassoc float %f2, 3.000000e+00
+  %t3 = fadd reassoc float %t1, %t2
+  ret float %t3
+}
+
+; This used to crash/miscompile.
+
+define float @fail1(float %f1, float %f2) {
+; CHECK-LABEL: @fail1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[TMP1]], -3.000000e+00
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %conv3 = fadd fast float %f1, -1.000000e+00
+  %add = fadd fast float %conv3, %conv3
+  %add2 = fadd fast float %add, %conv3
+  ret float %add2
+}
+
+define double @fail2(double %f1, double %f2) {
+; CHECK-LABEL: @fail2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[F2:%.*]], [[F2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %t1 = fsub fast double %f1, %f2
+  %t2 = fadd fast double %f1, %f2
+  %t3 = fsub fast double %t1, %t2
+  ret double %t3
+}
+
+; (X * C) - X --> X * (C - 1.0)
+
+define float @fsub_op0_fmul_const(float %x) {
+; CHECK-LABEL: @fsub_op0_fmul_const(
+; CHECK-NEXT:    [[SUB:%.*]] = fmul reassoc nsz float [[X:%.*]], 6.000000e+00
+; CHECK-NEXT:    ret float [[SUB]]
+;
+  %mul = fmul float %x, 7.0
+  %sub = fsub reassoc nsz float %mul, %x
+  ret float %sub
+}
+
+; (X * C) - X --> X * (C - 1.0)
+
+define <2 x float> @fsub_op0_fmul_const_vec(<2 x float> %x) {
+; CHECK-LABEL: @fsub_op0_fmul_const_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float 6.000000e+00, float -4.300000e+01>
+; CHECK-NEXT:    ret <2 x float> [[SUB]]
+;
+  %mul = fmul <2 x float> %x, <float 7.0, float -42.0>
+  %sub = fsub reassoc nsz <2 x float> %mul, %x
+  ret <2 x float> %sub
+}
+
+; X - (X * C) --> X * (1.0 - C)
+
+define float @fsub_op1_fmul_const(float %x) {
+; CHECK-LABEL: @fsub_op1_fmul_const(
+; CHECK-NEXT:    [[SUB:%.*]] = fmul reassoc nsz float [[X:%.*]], -6.000000e+00
+; CHECK-NEXT:    ret float [[SUB]]
+;
+  %mul = fmul float %x, 7.0
+  %sub = fsub reassoc nsz float %x, %mul
+  ret float %sub
+}
+
+; X - (X * C) --> X * (1.0 - C)
+
+define <2 x float> @fsub_op1_fmul_const_vec(<2 x float> %x) {
+; CHECK-LABEL: @fsub_op1_fmul_const_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float -6.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[SUB]]
+;
+  %mul = fmul <2 x float> %x, <float 7.0, float 0.0>
+  %sub = fsub reassoc nsz <2 x float> %x, %mul
+  ret <2 x float> %sub
+}
+
+; Verify the fold is not done with only 'reassoc' ('nsz' is required).
+
+define float @fsub_op0_fmul_const_wrong_FMF(float %x) {
+; CHECK-LABEL: @fsub_op0_fmul_const_wrong_FMF(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc float [[X:%.*]], 7.000000e+00
+; CHECK-NEXT:    [[SUB:%.*]] = fsub reassoc float [[MUL]], [[X]]
+; CHECK-NEXT:    ret float [[SUB]]
+;
+  %mul = fmul reassoc float %x, 7.0
+  %sub = fsub reassoc float %mul, %x
+  ret float %sub
+}
+
+; (select X+Y, X-Y) => X + (select Y, -Y)
+; This is always safe.  No FMF required.
+define float @fold16(float %x, float %y) {
+; CHECK-LABEL: @fold16(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub float -0.000000e+00, [[Y]]
+; CHECK-NEXT:    [[R_P:%.*]] = select i1 [[CMP]], float [[Y]], float [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = fadd float [[R_P]], [[X]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp = fcmp ogt float %x, %y
+  %plus = fadd float %x, %y
+  %minus = fsub float %x, %y
+  %r = select i1 %cmp, float %plus, float %minus
+  ret float %r
+}
+
+; =========================================================================
+;
+;   Testing-cases about negation
+;
+; =========================================================================
+define float @fneg1(float %f1, float %f2) {
+; CHECK-LABEL: @fneg1(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub = fsub float -0.000000e+00, %f1
+  %sub1 = fsub nsz float 0.000000e+00, %f2
+  %mul = fmul float %sub, %sub1
+  ret float %mul
+}
+
+define float @fneg2(float %x) {
+; CHECK-LABEL: @fneg2(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub nsz float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[SUB]]
+;
+  %sub = fsub nsz float 0.0, %x
+  ret float %sub
+}
+
+define <2 x float> @fneg2_vec_undef(<2 x float> %x) {
+; CHECK-LABEL: @fneg2_vec_undef(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[SUB]]
+;
+  %sub = fsub nsz <2 x float> <float undef, float 0.0>, %x
+  ret <2 x float> %sub
+}
+
+; =========================================================================
+;
+;   Testing-cases about div
+;
+; =========================================================================
+
+; X/C1 / C2 => X * (1/(C2*C1))
+define float @fdiv1(float %x) {
+; CHECK-LABEL: @fdiv1(
+; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000
+; CHECK-NEXT:    ret float [[DIV1]]
+;
+  %div = fdiv float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FD7303B60000000 = 0.36231884057971014492
+}
+
+; X*C1 / C2 => X * (C1/C2)
+define float @fdiv2(float %x) {
+; CHECK-LABEL: @fdiv2(
+; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FE0B21660000000
+; CHECK-NEXT:    ret float [[DIV1]]
+;
+  %mul = fmul float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %mul, 0x4002666660000000
+  ret float %div1
+
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FE0B21660000000 = 0.52173918485641479492
+}
+
+define <2 x float> @fdiv2_vec(<2 x float> %x) {
+; CHECK-LABEL: @fdiv2_vec(
+; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[DIV1]]
+;
+  %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
+  %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0>
+  ret <2 x float> %div1
+}
+
+; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
+;
+define float @fdiv3(float %x) {
+; CHECK-LABEL: @fdiv3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 0x3FDBD37A80000000
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv fast float [[TMP1]], 0x47EFFFFFE0000000
+; CHECK-NEXT:    ret float [[DIV1]]
+;
+  %div = fdiv float %x, 0x47EFFFFFE0000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+}
+
+; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
+define float @fdiv4(float %x) {
+; CHECK-LABEL: @fdiv4(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], 0x47EFFFFFE0000000
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[MUL]], 0x3FC99999A0000000
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %mul = fmul float %x, 0x47EFFFFFE0000000
+  %div = fdiv float %mul, 0x3FC99999A0000000
+  ret float %div
+}
+
+; =========================================================================
+;
+;   Test-cases for square root
+;
+; =========================================================================
+
+; A squared factor fed into a square root intrinsic should be hoisted out
+; as a fabs() value.
+
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_intrinsic_arg_squared(double %x) {
+; CHECK-LABEL: @sqrt_intrinsic_arg_squared(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[FABS]]
+;
+  %mul = fmul fast double %x, %x
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul)
+  ret double %sqrt
+}
+
+; Check all 6 combinations of a 3-way multiplication tree where
+; one factor is repeated.
+
+define double @sqrt_intrinsic_three_args1(double %x, double %y) {
+; CHECK-LABEL: @sqrt_intrinsic_three_args1(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %mul = fmul fast double %y, %x
+  %mul2 = fmul fast double %mul, %x
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+}
+
+define double @sqrt_intrinsic_three_args2(double %x, double %y) {
+; CHECK-LABEL: @sqrt_intrinsic_three_args2(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %mul = fmul fast double %x, %y
+  %mul2 = fmul fast double %mul, %x
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+}
+
+define double @sqrt_intrinsic_three_args3(double %x, double %y) {
+; CHECK-LABEL: @sqrt_intrinsic_three_args3(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %y
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+}
+
+define double @sqrt_intrinsic_three_args4(double %x, double %y) {
+; CHECK-LABEL: @sqrt_intrinsic_three_args4(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %mul = fmul fast double %y, %x
+  %mul2 = fmul fast double %x, %mul
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+}
+
+define double @sqrt_intrinsic_three_args5(double %x, double %y) {
+; CHECK-LABEL: @sqrt_intrinsic_three_args5(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %mul = fmul fast double %x, %y
+  %mul2 = fmul fast double %x, %mul
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+}
+
+define double @sqrt_intrinsic_three_args6(double %x, double %y) {
+; CHECK-LABEL: @sqrt_intrinsic_three_args6(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %y, %mul
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+}
+
+; If any operation is not 'fast', we can't simplify.
+
+define double @sqrt_intrinsic_not_so_fast(double %x, double %y) {
+; CHECK-LABEL: @sqrt_intrinsic_not_so_fast(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul fast double [[MUL]], [[Y:%.*]]
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[MUL2]])
+; CHECK-NEXT:    ret double [[SQRT]]
+;
+  %mul = fmul double %x, %x
+  %mul2 = fmul fast double %mul, %y
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+}
+
+define double @sqrt_intrinsic_arg_4th(double %x) {
+; CHECK-LABEL: @sqrt_intrinsic_arg_4th(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %mul
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+}
+
+define double @sqrt_intrinsic_arg_5th(double %x) {
+; CHECK-LABEL: @sqrt_intrinsic_arg_5th(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[MUL]], [[SQRT1]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %x
+  %mul3 = fmul fast double %mul2, %mul
+  %sqrt = call fast double @llvm.sqrt.f64(double %mul3)
+  ret double %sqrt
+}
+
+; Check that square root calls have the same behavior.
+
+declare float @sqrtf(float)
+declare double @sqrt(double)
+declare fp128 @sqrtl(fp128)
+
+define float @sqrt_call_squared_f32(float %x) {
+; CHECK-LABEL: @sqrt_call_squared_f32(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[FABS]]
+;
+  %mul = fmul fast float %x, %x
+  %sqrt = call fast float @sqrtf(float %mul)
+  ret float %sqrt
+}
+
+define double @sqrt_call_squared_f64(double %x) {
+; CHECK-LABEL: @sqrt_call_squared_f64(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[FABS]]
+;
+  %mul = fmul fast double %x, %x
+  %sqrt = call fast double @sqrt(double %mul)
+  ret double %sqrt
+}
+
+define fp128 @sqrt_call_squared_f128(fp128 %x) {
+; CHECK-LABEL: @sqrt_call_squared_f128(
+; CHECK-NEXT:    [[FABS:%.*]] = call fast fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
+; CHECK-NEXT:    ret fp128 [[FABS]]
+;
+  %mul = fmul fast fp128 %x, %x
+  %sqrt = call fast fp128 @sqrtl(fp128 %mul)
+  ret fp128 %sqrt
+}
+
+; =========================================================================
+;
+;   Test-cases for fmin / fmax
+;
+; =========================================================================
+
+declare double @fmax(double, double)
+declare double @fmin(double, double)
+declare float @fmaxf(float, float)
+declare float @fminf(float, float)
+declare fp128 @fmaxl(fp128, fp128)
+declare fp128 @fminl(fp128, fp128)
+
+; No NaNs is the minimum requirement to replace these calls.
+; This should always be set when unsafe-fp-math is true, but
+; alternate the attributes for additional test coverage.
+; 'nsz' is implied by the definition of fmax or fmin itself.
+
+; Shrink and remove the call.
+define float @max1(float %a, float %b) {
+; CHECK-LABEL: @max1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %c = fpext float %a to double
+  %d = fpext float %b to double
+  %e = call fast double @fmax(double %c, double %d)
+  %f = fptrunc double %e to float
+  ret float %f
+}
+
+define float @max2(float %a, float %b) {
+; CHECK-LABEL: @max2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %c = call nnan float @fmaxf(float %a, float %b)
+  ret float %c
+}
+
+
+define double @max3(double %a, double %b) {
+; CHECK-LABEL: @max3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %c = call fast double @fmax(double %a, double %b)
+  ret double %c
+}
+
+define fp128 @max4(fp128 %a, fp128 %b) {
+; CHECK-LABEL: @max4(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
+; CHECK-NEXT:    ret fp128 [[TMP2]]
+;
+  %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
+  ret fp128 %c
+}
+
+; Shrink and remove the call.
+define float @min1(float %a, float %b) {
+; CHECK-LABEL: @min1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %c = fpext float %a to double
+  %d = fpext float %b to double
+  %e = call nnan double @fmin(double %c, double %d)
+  %f = fptrunc double %e to float
+  ret float %f
+}
+
+define float @min2(float %a, float %b) {
+; CHECK-LABEL: @min2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %c = call fast float @fminf(float %a, float %b)
+  ret float %c
+}
+
+define double @min3(double %a, double %b) {
+; CHECK-LABEL: @min3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %c = call nnan double @fmin(double %a, double %b)
+  ret double %c
+}
+
+define fp128 @min4(fp128 %a, fp128 %b) {
+; CHECK-LABEL: @min4(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
+; CHECK-NEXT:    ret fp128 [[TMP2]]
+;
+  %c = call fast fp128 @fminl(fp128 %a, fp128 %b)
+  ret fp128 %c
+}
+
+; ((which ? 2.0 : a) + 1.0) => (which ? 3.0 : (a + 1.0))
+; This is always safe.  No FMF required.
+define float @test55(i1 %which, float %a) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    [[PHITMP:%.*]] = fadd float [[A:%.*]], 1.000000e+00
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[PHITMP]], [[DELAY]] ]
+; CHECK-NEXT:    ret float [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi float [ 2.0, %entry ], [ %a, %delay ]
+  %value = fadd float %A, 1.0
+  ret float %value
+}
diff --git a/llvm/test/Transforms/InstCombine/fcmp-select.ll b/llvm/test/Transforms/InstCombine/fcmp-select.ll
new file mode 100644
index 00000000000..7fc59bbcb7d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fcmp-select.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @use(i1)
+
+; X == 42.0 ? X : 42.0 --> 42.0
+
+define double @oeq(double %x) {
+; CHECK-LABEL: @oeq(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    ret double 4.200000e+01
+;
+  %cmp = fcmp oeq double %x, 42.0
+  call void @use(i1 %cmp)      ; extra use to thwart predicate canonicalization
+  %cond = select i1 %cmp, double %x, double 42.0
+  ret double %cond
+}
+
+; X == 42.0 ? 42.0 : X --> X
+
+define float @oeq_swapped(float %x) {
+; CHECK-LABEL: @oeq_swapped(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    ret float [[X]]
+;
+  %cmp = fcmp oeq float %x, 42.0
+  call void @use(i1 %cmp)      ; extra use to thwart predicate canonicalization
+  %cond = select i1 %cmp, float 42.0, float %x
+  ret float %cond
+}
+
+; x != y ? x : y -> x if it's the right kind of != and at least
+; one of x and y is not negative zero.
+
+; X != 42.0 ? X : 42.0 --> X
+
+define double @une(double %x) {
+; CHECK-LABEL: @une(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    ret double [[X]]
+;
+  %cmp = fcmp une double %x, 42.0
+  call void @use(i1 %cmp)      ; extra use to thwart predicate canonicalization
+  %cond = select i1 %cmp, double %x, double 42.0
+  ret double %cond
+}
+
+; X != 42.0 ? 42.0 : X --> 42.0
+
+define double @une_swapped(double %x) {
+; CHECK-LABEL: @une_swapped(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    ret double 4.200000e+01
+;
+  %cmp = fcmp une double %x, 42.0
+  call void @use(i1 %cmp)      ; extra use to thwart predicate canonicalization
+  %cond = select i1 %cmp, double 42.0, double %x
+  ret double %cond
+}
+
+define double @une_could_be_negzero(double %x, double %y) {
+; CHECK-LABEL: @une_could_be_negzero(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], double [[X]], double [[Y]]
+; CHECK-NEXT:    ret double [[COND]]
+;
+  %cmp = fcmp une double %x, %y
+  call void @use(i1 %cmp)      ; extra use to thwart predicate canonicalization
+  %cond = select i1 %cmp, double %x, double %y
+  ret double %cond
+}
+
+define double @une_swapped_could_be_negzero(double %x, double %y) {
+; CHECK-LABEL: @une_swapped_could_be_negzero(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], double [[Y]], double [[X]]
+; CHECK-NEXT:    ret double [[COND]]
+;
+  %cmp = fcmp une double %x, %y
+  call void @use(i1 %cmp)      ; extra use to thwart predicate canonicalization
+  %cond = select i1 %cmp, double %y, double %x
+  ret double %cond
+}
+
+define double @one(double %x) {
+; CHECK-LABEL: @one(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one double [[X:%.*]], -1.000000e+00
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], double [[X]], double -1.000000e+00
+; CHECK-NEXT:    ret double [[COND]]
+;
+  %cmp = fcmp one double %x, -1.0
+  call void @use(i1 %cmp)      ; extra use to thwart predicate canonicalization
+  %cond = select i1 %cmp, double %x, double -1.0
+  ret double %cond
+}
+
+define double @one_swapped(double %x) {
+; CHECK-LABEL: @one_swapped(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one double [[X:%.*]], -1.000000e+00
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], double -1.000000e+00, double [[X]]
+; CHECK-NEXT:    ret double [[COND]]
+;
+  %cmp = fcmp one double %x, -1.0
+  call void @use(i1 %cmp)      ; extra use to thwart predicate canonicalization
+  %cond = select i1 %cmp, double -1.0, double %x
+  ret double %cond
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fcmp-special.ll b/llvm/test/Transforms/InstCombine/fcmp-special.ll
new file mode 100644
index 00000000000..490dab5f24d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fcmp-special.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @oeq_self(double %arg) {
+; CHECK-LABEL: @oeq_self(
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[ARG:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %tmp = fcmp oeq double %arg, %arg
+  ret i1 %tmp
+}
+
+; PR1111 - https://bugs.llvm.org/show_bug.cgi?id=1111
+
+define i1 @une_self(double %x) {
+; CHECK-LABEL: @une_self(
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %tmp = fcmp une double %x, %x
+  ret i1 %tmp
+}
+
+; When just checking for a NaN (ORD/UNO), canonicalize constants.
+; Float/double are alternated for additional coverage.
+
+define i1 @ord_zero(float %x) {
+; CHECK-LABEL: @ord_zero(
+; CHECK-NEXT:    [[F:%.*]] = fcmp ord float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[F]]
+;
+  %f = fcmp ord float %x, 0.0
+  ret i1 %f
+}
+
+define i1 @ord_nonzero(double %x) {
+; CHECK-LABEL: @ord_nonzero(
+; CHECK-NEXT:    [[F:%.*]] = fcmp ord double [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[F]]
+;
+  %f = fcmp ord double %x, 3.0
+  ret i1 %f
+}
+
+define i1 @ord_self(float %x) {
+; CHECK-LABEL: @ord_self(
+; CHECK-NEXT:    [[F:%.*]] = fcmp ord float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[F]]
+;
+  %f = fcmp ord float %x, %x
+  ret i1 %f
+}
+
+define i1 @uno_zero(double %x) {
+; CHECK-LABEL: @uno_zero(
+; CHECK-NEXT:    [[F:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[F]]
+;
+  %f = fcmp uno double %x, 0.0
+  ret i1 %f
+}
+
+define i1 @uno_nonzero(float %x) {
+; CHECK-LABEL: @uno_nonzero(
+; CHECK-NEXT:    [[F:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[F]]
+;
+  %f = fcmp uno float %x, 3.0
+  ret i1 %f
+}
+
+define i1 @uno_self(double %x) {
+; CHECK-LABEL: @uno_self(
+; CHECK-NEXT:    [[F:%.*]] = fcmp uno double [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[F]]
+;
+  %f = fcmp uno double %x, %x
+  ret i1 %f
+}
+
+define <2 x i1> @ord_zero_vec(<2 x double> %x) {
+; CHECK-LABEL: @ord_zero_vec(
+; CHECK-NEXT:    [[F:%.*]] = fcmp ord <2 x double> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp ord <2 x double> %x, zeroinitializer
+  ret <2 x i1> %f
+}
+
+define <2 x i1> @ord_nonzero_vec(<2 x float> %x) {
+; CHECK-LABEL: @ord_nonzero_vec(
+; CHECK-NEXT:    [[F:%.*]] = fcmp ord <2 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp ord <2 x float> %x, <float 3.0, float 5.0>
+  ret <2 x i1> %f
+}
+
+define <2 x i1> @ord_self_vec(<2 x double> %x) {
+; CHECK-LABEL: @ord_self_vec(
+; CHECK-NEXT:    [[F:%.*]] = fcmp ord <2 x double> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp ord <2 x double> %x, %x
+  ret <2 x i1> %f
+}
+
+define <2 x i1> @uno_zero_vec(<2 x float> %x) {
+; CHECK-LABEL: @uno_zero_vec(
+; CHECK-NEXT:    [[F:%.*]] = fcmp uno <2 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp uno <2 x float> %x, zeroinitializer
+  ret <2 x i1> %f
+}
+
+define <2 x i1> @uno_nonzero_vec(<2 x double> %x) {
+; CHECK-LABEL: @uno_nonzero_vec(
+; CHECK-NEXT:    [[F:%.*]] = fcmp uno <2 x double> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp uno <2 x double> %x, <double 3.0, double 5.0>
+  ret <2 x i1> %f
+}
+
+define <2 x i1> @uno_self_vec(<2 x float> %x) {
+; CHECK-LABEL: @uno_self_vec(
+; CHECK-NEXT:    [[F:%.*]] = fcmp uno <2 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp uno <2 x float> %x, %x
+  ret <2 x i1> %f
+}
+
+; If a scalar constant is NaN in any of the above tests, it would have been eliminated by InstSimplify.
+; If a vector has a NaN element, we don't do anything with it.
+
+define <2 x i1> @uno_vec_with_nan(<2 x double> %x) {
+; CHECK-LABEL: @uno_vec_with_nan(
+; CHECK-NEXT:    [[F:%.*]] = fcmp uno <2 x double> [[X:%.*]], <double 3.000000e+00, double 0x7FF00000FFFFFFFF>
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp uno <2 x double> %x, <double 3.0, double 0x7FF00000FFFFFFFF>
+  ret <2 x i1> %f
+}
+
+define <2 x i1> @uno_vec_with_undef(<2 x double> %x) {
+; CHECK-LABEL: @uno_vec_with_undef(
+; CHECK-NEXT:    [[F:%.*]] = fcmp uno <2 x double> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp uno <2 x double> %x, <double 3.0, double undef>
+  ret <2 x i1> %f
+}
+
+define <2 x i1> @ord_vec_with_undef(<2 x double> %x) {
+; CHECK-LABEL: @ord_vec_with_undef(
+; CHECK-NEXT:    [[F:%.*]] = fcmp ord <2 x double> [[X:%.*]], <double 0.000000e+00, double undef>
+; CHECK-NEXT:    ret <2 x i1> [[F]]
+;
+  %f = fcmp ord <2 x double> %x, <double 0.0, double undef>
+  ret <2 x i1> %f
+}
+
+; TODO: This could be handled in InstSimplify.
+
+define i1 @nnan_ops_to_fcmp_ord(float %x, float %y) {
+; CHECK-LABEL: @nnan_ops_to_fcmp_ord(
+; CHECK-NEXT:    ret i1 true
+;
+  %mul = fmul nnan float %x, %y
+  %div = fdiv nnan float %x, %y
+  %cmp = fcmp ord float %mul, %div
+  ret i1 %cmp
+}
+
+; TODO: This could be handled in InstSimplify.
+
+define i1 @nnan_ops_to_fcmp_uno(float %x, float %y) {
+; CHECK-LABEL: @nnan_ops_to_fcmp_uno(
+; CHECK-NEXT:    ret i1 false
+;
+  %mul = fmul nnan float %x, %y
+  %div = fdiv nnan float %x, %y
+  %cmp = fcmp uno float %mul, %div
+  ret i1 %cmp
+}
+
+; TODO: For any predicate/type/FMF, comparison to -0.0 is the same as comparison to +0.0.
+
+define i1 @negative_zero_oeq(float %x) {
+; CHECK-LABEL: @negative_zero_oeq(
+; CHECK-NEXT:    [[R:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %r = fcmp oeq float %x, -0.0
+  ret i1 %r
+}
+
+define i1 @negative_zero_oge(double %x) {
+; CHECK-LABEL: @negative_zero_oge(
+; CHECK-NEXT:    [[R:%.*]] = fcmp nnan oge double [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %r = fcmp nnan oge double %x, -0.0
+  ret i1 %r
+}
+
+define i1 @negative_zero_uge(half %x) {
+; CHECK-LABEL: @negative_zero_uge(
+; CHECK-NEXT:    [[R:%.*]] = fcmp fast uge half [[X:%.*]], 0xH0000
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %r = fcmp fast uge half %x, -0.0
+  ret i1 %r
+}
+
+define <2 x i1> @negative_zero_olt_vec(<2 x float> %x) {
+; CHECK-LABEL: @negative_zero_olt_vec(
+; CHECK-NEXT:    [[R:%.*]] = fcmp reassoc ninf olt <2 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %r = fcmp reassoc ninf olt <2 x float> %x, <float -0.0, float -0.0>
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @negative_zero_une_vec_undef(<2 x double> %x) {
+; CHECK-LABEL: @negative_zero_une_vec_undef(
+; CHECK-NEXT:    [[R:%.*]] = fcmp nnan une <2 x double> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %r = fcmp nnan une <2 x double> %x, <double -0.0, double undef>
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @negative_zero_ule_vec_mixed(<2 x float> %x) {
+; CHECK-LABEL: @negative_zero_ule_vec_mixed(
+; CHECK-NEXT:    [[R:%.*]] = fcmp ule <2 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %r = fcmp ule <2 x float> %x, <float 0.0, float -0.0>
+  ret <2 x i1> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
new file mode 100644
index 00000000000..be7aedc7c60
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -0,0 +1,463 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare half @llvm.fabs.f16(half)
+declare double @llvm.fabs.f64(double)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
+
+define i1 @fpext_fpext(float %x, float %y) {
+; CHECK-LABEL: @fpext_fpext(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp nnan ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %ext1 = fpext float %x to double
+  %ext2 = fpext float %y to double
+  %cmp = fcmp nnan ogt double %ext1, %ext2
+  ret i1 %cmp
+}
+
+define i1 @fpext_constant(float %a) {
+; CHECK-LABEL: @fpext_constant(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ogt float [[A:%.*]], 1.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %ext = fpext float %a to double
+  %cmp = fcmp ninf ogt double %ext, 1.000000e+00
+  ret i1 %cmp
+}
+
+define <2 x i1> @fpext_constant_vec_splat(<2 x half> %a) {
+; CHECK-LABEL: @fpext_constant_vec_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp nnan ole <2 x half> [[A:%.*]], <half 0xH5140, half 0xH5140>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %ext = fpext <2 x half> %a to <2 x double>
+  %cmp = fcmp nnan ole <2 x double> %ext, <double 42.0, double 42.0>
+  ret <2 x i1> %cmp
+}
+
+define i1 @fpext_constant_lossy(float %a) {
+; CHECK-LABEL: @fpext_constant_lossy(
+; CHECK-NEXT:    [[EXT:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt double [[EXT]], 0x3FF0000000000001
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %ext = fpext float %a to double
+  %cmp = fcmp ogt double %ext, 0x3FF0000000000001 ; more precision than float.
+  ret i1 %cmp
+}
+
+define i1 @fpext_constant_denorm(float %a) {
+; CHECK-LABEL: @fpext_constant_denorm(
+; CHECK-NEXT:    [[EXT:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt double [[EXT]], 0x36A0000000000000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %ext = fpext float %a to double
+  %cmp = fcmp ogt double %ext, 0x36A0000000000000 ; denormal in float.
+  ret i1 %cmp
+}
+
+define i1 @fneg_constant_swap_pred(float %x) {
+; CHECK-LABEL: @fneg_constant_swap_pred(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[X:%.*]], -1.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %neg = fsub float -0.0, %x
+  %cmp = fcmp ogt float %neg, 1.0
+  ret i1 %cmp
+}
+
+define <2 x i1> @fneg_constant_swap_pred_vec(<2 x float> %x) {
+; CHECK-LABEL: @fneg_constant_swap_pred_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <2 x float> [[X:%.*]], <float -1.000000e+00, float -2.000000e+00>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %cmp = fcmp ogt <2 x float> %neg, <float 1.0, float 2.0>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @fneg_constant_swap_pred_vec_undef(<2 x float> %x) {
+; CHECK-LABEL: @fneg_constant_swap_pred_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <2 x float> [[X:%.*]], <float -1.000000e+00, float -2.000000e+00>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg = fsub <2 x float> <float undef, float -0.0>, %x
+  %cmp = fcmp ogt <2 x float> %neg, <float 1.0, float 2.0>
+  ret <2 x i1> %cmp
+}
+
+; The new fcmp should have the same FMF as the original.
+
+define i1 @fneg_fmf(float %x) {
+; CHECK-LABEL: @fneg_fmf(
+; CHECK-NEXT:    [[R:%.*]] = fcmp fast oeq float [[X:%.*]], -4.200000e+01
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %n = fsub fast float -0.0, %x
+  %r = fcmp fast oeq float %n, 42.0
+  ret i1 %r
+}
+
+; The new fcmp should have the same FMF as the original, vector edition.
+
+define <2 x i1> @fcmp_fneg_fmf_vec(<2 x float> %x) {
+; CHECK-LABEL: @fcmp_fneg_fmf_vec(
+; CHECK-NEXT:    [[R:%.*]] = fcmp reassoc nnan ule <2 x float> [[X:%.*]], <float -4.200000e+01, float 1.900000e+01>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %n = fsub nsz <2 x float> zeroinitializer, %x
+  %r = fcmp nnan reassoc uge <2 x float> %n, <float 42.0, float -19.0>
+  ret <2 x i1> %r
+}
+
+define i1 @fneg_fneg_swap_pred(float %x, float %y) {
+; CHECK-LABEL: @fneg_fneg_swap_pred(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp nnan ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %neg1 = fsub float -0.0, %x
+  %neg2 = fsub float -0.0, %y
+  %cmp = fcmp nnan olt float %neg1, %neg2
+  ret i1 %cmp
+}
+
+define <2 x i1> @fneg_fneg_swap_pred_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fneg_fneg_swap_pred_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ogt <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg1 = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %neg2 = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %cmp = fcmp ninf olt <2 x float> %neg1, %neg2
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @fneg_fneg_swap_pred_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fneg_fneg_swap_pred_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg1 = fsub <2 x float> <float -0.0, float undef>, %x
+  %neg2 = fsub <2 x float> <float undef, float -0.0>, %y
+  %cmp = fcmp olt <2 x float> %neg1, %neg2
+  ret <2 x i1> %cmp
+}
+
+define i1 @test7(float %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %ext = fpext float %x to ppc_fp128
+  %cmp = fcmp ogt ppc_fp128 %ext, 0xM00000000000000000000000000000000
+  ret i1 %cmp
+}
+
+define float @test8(float %x) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[CONV2:%.*]] = uitofp i1 [[CMP]] to float
+; CHECK-NEXT:    ret float [[CONV2]]
+;
+  %conv = fpext float %x to double
+  %cmp = fcmp olt double %conv, 0.000000e+00
+  %conv1 = zext i1 %cmp to i32
+  %conv2 = sitofp i32 %conv1 to float
+  ret float %conv2
+; Float comparison to zero shouldn't cast to double.
+}
+
+define i1 @fabs_uge(double %a) {
+; CHECK-LABEL: @fabs_uge(
+; CHECK-NEXT:    ret i1 true
+;
+  %call = call double @llvm.fabs.f64(double %a)
+  %cmp = fcmp uge double %call, 0.0
+  ret i1 %cmp
+}
+
+define i1 @fabs_olt(half %a) {
+; CHECK-LABEL: @fabs_olt(
+; CHECK-NEXT:    ret i1 false
+;
+  %call = call half @llvm.fabs.f16(half %a)
+  %cmp = fcmp olt half %call, 0.0
+  ret i1 %cmp
+}
+
+define <2 x i1> @fabs_ole(<2 x float> %a) {
+; CHECK-LABEL: @fabs_ole(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf oeq <2 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %call = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %cmp = fcmp ninf ole <2 x float> %call, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @fabs_ule(<2 x float> %a) {
+; CHECK-LABEL: @fabs_ule(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf arcp ueq <2 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %call = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %cmp = fcmp ninf arcp ule <2 x float> %call, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @fabs_ogt(double %a) {
+; CHECK-LABEL: @fabs_ogt(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp reassoc one double [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = call double @llvm.fabs.f64(double %a)
+  %cmp = fcmp reassoc ogt double %call, 0.0
+  ret i1 %cmp
+}
+
+define i1 @fabs_ugt(double %a) {
+; CHECK-LABEL: @fabs_ugt(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp reassoc ninf une double [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = call double @llvm.fabs.f64(double %a)
+  %cmp = fcmp ninf reassoc ugt double %call, 0.0
+  ret i1 %cmp
+}
+
+define i1 @fabs_oge(double %a) {
+; CHECK-LABEL: @fabs_oge(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp afn ord double [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = call double @llvm.fabs.f64(double %a)
+  %cmp = fcmp afn oge double %call, 0.0
+  ret i1 %cmp
+}
+
+define i1 @fabs_ult(double %a) {
+; CHECK-LABEL: @fabs_ult(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp reassoc arcp uno double [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = call double @llvm.fabs.f64(double %a)
+  %cmp = fcmp reassoc arcp ult double %call, 0.0
+  ret i1 %cmp
+}
+
+define <2 x i1> @fabs_ult_nnan(<2 x float> %a) {
+; CHECK-LABEL: @fabs_ult_nnan(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %call = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %cmp = fcmp nnan reassoc arcp ult <2 x float> %call, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @fabs_une(half %a) {
+; CHECK-LABEL: @fabs_une(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf une half [[A:%.*]], 0xH0000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = call half @llvm.fabs.f16(half %a)
+  %cmp = fcmp ninf une half %call, 0.0
+  ret i1 %cmp
+}
+
+define i1 @fabs_oeq(double %a) {
+; CHECK-LABEL: @fabs_oeq(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp reassoc ninf oeq double [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = call double @llvm.fabs.f64(double %a)
+  %cmp = fcmp ninf reassoc oeq double %call, 0.0
+  ret i1 %cmp
+}
+
+define i1 @fabs_one(double %a) {
+; CHECK-LABEL: @fabs_one(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp fast one double [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = call double @llvm.fabs.f64(double %a)
+  %cmp = fcmp fast one double %call, 0.0
+  ret i1 %cmp
+}
+
+define <2 x i1> @fabs_ueq(<2 x float> %a) {
+; CHECK-LABEL: @fabs_ueq(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp arcp ueq <2 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %call = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %cmp = fcmp arcp ueq <2 x float> %call, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @fabs_ord(<2 x float> %a) {
+; CHECK-LABEL: @fabs_ord(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp arcp ord <2 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %call = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %cmp = fcmp arcp ord <2 x float> %call, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @fabs_uno(<2 x float> %a) {
+; CHECK-LABEL: @fabs_uno(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp arcp uno <2 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %call = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+  %cmp = fcmp arcp uno <2 x float> %call, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+; Don't crash.
+define i32 @test17(double %a, double (double)* %p) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call double [[P:%.*]](double [[A:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq double [[CALL]], 0.000000e+00
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call double %p(double %a)
+  %cmp = fcmp ueq double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; Can fold fcmp with undef on one side by choosing NaN for the undef
+define i32 @test18_undef_unordered(float %a) {
+; CHECK-LABEL: @test18_undef_unordered(
+; CHECK-NEXT:    ret i32 1
+;
+  %cmp = fcmp ueq float %a, undef
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+; Can fold fcmp with undef on one side by choosing NaN for the undef
+define i32 @test18_undef_ordered(float %a) {
+; CHECK-LABEL: @test18_undef_ordered(
+; CHECK-NEXT:    ret i32 0
+;
+  %cmp = fcmp oeq float %a, undef
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; Can fold fcmp with undef on both side
+;   fcmp u_pred undef, undef -> true
+;   fcmp o_pred undef, undef -> false
+; because whatever you choose for the first undef
+; you can choose NaN for the other undef
+define i1 @test19_undef_unordered() {
+; CHECK-LABEL: @test19_undef_unordered(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ueq float undef, undef
+  ret i1 %cmp
+}
+
+define i1 @test19_undef_ordered() {
+; CHECK-LABEL: @test19_undef_ordered(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp oeq float undef, undef
+  ret i1 %cmp
+}
+
+; Can fold 1.0 / X < 0.0 --> X < 0 with ninf
+define i1 @test20_recipX_olt_0(float %X) {
+; CHECK-LABEL: @test20_recipX_olt_0(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf olt float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %div = fdiv ninf float 1.0, %X
+  %cmp = fcmp ninf olt float %div, 0.0
+  ret i1 %cmp
+}
+
+; Can fold -2.0 / X <= 0.0 --> X >= 0 with ninf
+define i1 @test21_recipX_ole_0(float %X) {
+; CHECK-LABEL: @test21_recipX_ole_0(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %div = fdiv ninf float -2.0, %X
+  %cmp = fcmp ninf ole float %div, 0.0
+  ret i1 %cmp
+}
+
+; Can fold 2.0 / X > 0.0 --> X > 0 with ninf
+define i1 @test22_recipX_ogt_0(float %X) {
+; CHECK-LABEL: @test22_recipX_ogt_0(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ogt float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %div = fdiv ninf float 2.0, %X
+  %cmp = fcmp ninf ogt float %div, 0.0
+  ret i1 %cmp
+}
+
+; Can fold -1.0 / X >= 0.0 --> X <= 0 with ninf
+define i1 @test23_recipX_oge_0(float %X) {
+; CHECK-LABEL: @test23_recipX_oge_0(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ole float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %div = fdiv ninf float -1.0, %X
+  %cmp = fcmp ninf oge float %div, 0.0
+  ret i1 %cmp
+}
+
+; Do not fold 1.0 / X > 0.0 when ninf is missing
+define i1 @test24_recipX_noninf_cmp(float %X) {
+; CHECK-LABEL: @test24_recipX_noninf_cmp(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv ninf float 2.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[DIV]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %div = fdiv ninf float 2.0, %X
+  %cmp = fcmp ogt float %div, 0.0
+  ret i1 %cmp
+}
+
+; Do not fold 1.0 / X > 0.0 when ninf is missing
+define i1 @test25_recipX_noninf_div(float %X) {
+; CHECK-LABEL: @test25_recipX_noninf_div(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float 2.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ogt float [[DIV]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %div = fdiv float 2.0, %X
+  %cmp = fcmp ninf ogt float %div, 0.0
+  ret i1 %cmp
+}
+
+; Do not fold 1.0 / X > 0.0 with unordered predicates
+define i1 @test26_recipX_unorderd(float %X) {
+; CHECK-LABEL: @test26_recipX_unorderd(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv ninf float 2.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ugt float [[DIV]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %div = fdiv ninf float 2.0, %X
+  %cmp = fcmp ninf ugt float %div, 0.0
+  ret i1 %cmp
+}
+
+; Fold <-1.0, -1.0> / X > <-0.0, -0.0>
+define <2 x i1> @test27_recipX_gt_vecsplat(<2 x float> %X) {
+; CHECK-LABEL: @test27_recipX_gt_vecsplat(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf olt <2 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %div = fdiv ninf <2 x float> <float -1.0, float -1.0>, %X
+  %cmp = fcmp ninf ogt <2 x float> %div, <float -0.0, float -0.0>
+  ret <2 x i1> %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fdiv-cos-sin.ll b/llvm/test/Transforms/InstCombine/fdiv-cos-sin.ll
new file mode 100644
index 00000000000..3284e1f1b1c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fdiv-cos-sin.ll
@@ -0,0 +1,131 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define double @fdiv_cos_sin(double %a) {
+; CHECK-LABEL: @fdiv_cos_sin(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.cos.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.sin.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %1 = call double @llvm.cos.f64(double %a)
+  %2 = call double @llvm.sin.f64(double %a)
+  %div = fdiv double %1, %2
+  ret double %div
+}
+
+define double @fdiv_strict_cos_strict_sin_reassoc(double %a) {
+; CHECK-LABEL: @fdiv_strict_cos_strict_sin_reassoc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.cos.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc double @llvm.sin.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %1 = call double @llvm.cos.f64(double %a)
+  %2 = call reassoc double @llvm.sin.f64(double %a)
+  %div = fdiv double %1, %2
+  ret double %div
+}
+
+define double @fdiv_reassoc_cos_strict_sin_strict(double %a, i32* dereferenceable(2) %dummy) {
+; CHECK-LABEL: @fdiv_reassoc_cos_strict_sin_strict(
+; CHECK-NEXT:    [[TAN:%.*]] = call reassoc double @tan(double [[A:%.*]]) #1
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double 1.000000e+00, [[TAN]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = call double @llvm.cos.f64(double %a)
+  %2 = call double @llvm.sin.f64(double %a)
+  %div = fdiv reassoc double %1, %2
+  ret double %div
+}
+
+define double @fdiv_reassoc_cos_reassoc_sin_strict(double %a) {
+; CHECK-LABEL: @fdiv_reassoc_cos_reassoc_sin_strict(
+; CHECK-NEXT:    [[TAN:%.*]] = call reassoc double @tan(double [[A:%.*]]) #1
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double 1.000000e+00, [[TAN]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = call reassoc double @llvm.cos.f64(double %a)
+  %2 = call double @llvm.sin.f64(double %a)
+  %div = fdiv reassoc double %1, %2
+  ret double %div
+}
+
+define double @fdiv_cos_sin_reassoc_multiple_uses(double %a) {
+; CHECK-LABEL: @fdiv_cos_sin_reassoc_multiple_uses(
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc double @llvm.cos.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc double @llvm.sin.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    call void @use(double [[TMP2]])
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %1 = call reassoc double @llvm.cos.f64(double %a)
+  %2 = call reassoc double @llvm.sin.f64(double %a)
+  %div = fdiv reassoc double %1, %2
+  call void @use(double %2)
+  ret double %div
+}
+
+define double @fdiv_cos_sin_reassoc(double %a) {
+; CHECK-LABEL: @fdiv_cos_sin_reassoc(
+; CHECK-NEXT:    [[TAN:%.*]] = call reassoc double @tan(double [[A:%.*]]) #1
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double 1.000000e+00, [[TAN]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = call reassoc double @llvm.cos.f64(double %a)
+  %2 = call reassoc double @llvm.sin.f64(double %a)
+  %div = fdiv reassoc double %1, %2
+  ret double %div
+}
+
+define half @fdiv_cosf16_sinf16_reassoc(half %a) {
+; CHECK-LABEL: @fdiv_cosf16_sinf16_reassoc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc half @llvm.cos.f16(half [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc half @llvm.sin.f16(half [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc half [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret half [[DIV]]
+;
+  %1 = call reassoc half @llvm.cos.f16(half %a)
+  %2 = call reassoc half @llvm.sin.f16(half %a)
+  %div = fdiv reassoc half %1, %2
+  ret half %div
+}
+
+define float @fdiv_cosf_sinf_reassoc(float %a) {
+; CHECK-LABEL: @fdiv_cosf_sinf_reassoc(
+; CHECK-NEXT:    [[TANF:%.*]] = call reassoc float @tanf(float [[A:%.*]]) #1
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc float 1.000000e+00, [[TANF]]
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = call reassoc float @llvm.cos.f32(float %a)
+  %2 = call reassoc float @llvm.sin.f32(float %a)
+  %div = fdiv reassoc float %1, %2
+  ret float %div
+}
+
+define fp128 @fdiv_cosfp128_sinfp128_reassoc(fp128 %a) {
+; CHECK-LABEL: @fdiv_cosfp128_sinfp128_reassoc(
+; CHECK-NEXT:    [[TANL:%.*]] = call reassoc fp128 @tanl(fp128 [[A:%.*]]) #1
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc fp128 0xL00000000000000003FFF000000000000, [[TANL]]
+; CHECK-NEXT:    ret fp128 [[TMP1]]
+;
+  %1 = call reassoc fp128 @llvm.cos.fp128(fp128 %a)
+  %2 = call reassoc fp128 @llvm.sin.fp128(fp128 %a)
+  %div = fdiv reassoc fp128 %1, %2
+  ret fp128 %div
+}
+
+declare half @llvm.cos.f16(half) #1
+declare float @llvm.cos.f32(float) #1
+declare double @llvm.cos.f64(double) #1
+declare fp128 @llvm.cos.fp128(fp128) #1
+
+declare half @llvm.sin.f16(half) #1
+declare float @llvm.sin.f32(float) #1
+declare double @llvm.sin.f64(double) #1
+declare fp128 @llvm.sin.fp128(fp128) #1
+
+declare void @use(double)
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/fdiv-sin-cos.ll b/llvm/test/Transforms/InstCombine/fdiv-sin-cos.ll
new file mode 100644
index 00000000000..f94e5dd75a1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fdiv-sin-cos.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define double @fdiv_sin_cos(double %a) {
+; CHECK-LABEL: @fdiv_sin_cos(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sin.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.cos.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %1 = call double @llvm.sin.f64(double %a)
+  %2 = call double @llvm.cos.f64(double %a)
+  %div = fdiv double %1, %2
+  ret double %div
+}
+
+define double @fdiv_strict_sin_strict_cos_reassoc(double %a) {
+; CHECK-LABEL: @fdiv_strict_sin_strict_cos_reassoc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sin.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc double @llvm.cos.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %1 = call double @llvm.sin.f64(double %a)
+  %2 = call reassoc double @llvm.cos.f64(double %a)
+  %div = fdiv double %1, %2
+  ret double %div
+}
+
+define double @fdiv_reassoc_sin_strict_cos_strict(double %a, i32* dereferenceable(2) %dummy) {
+; CHECK-LABEL: @fdiv_reassoc_sin_strict_cos_strict(
+; CHECK-NEXT:    [[TAN:%.*]] = call reassoc double @tan(double [[A:%.*]]) #1
+; CHECK-NEXT:    ret double [[TAN]]
+;
+  %1 = call double @llvm.sin.f64(double %a)
+  %2 = call double @llvm.cos.f64(double %a)
+  %div = fdiv reassoc double %1, %2
+  ret double %div
+}
+
+define double @fdiv_reassoc_sin_reassoc_cos_strict(double %a) {
+; CHECK-LABEL: @fdiv_reassoc_sin_reassoc_cos_strict(
+; CHECK-NEXT:    [[TAN:%.*]] = call reassoc double @tan(double [[A:%.*]]) #1
+; CHECK-NEXT:    ret double [[TAN]]
+;
+  %1 = call reassoc double @llvm.sin.f64(double %a)
+  %2 = call double @llvm.cos.f64(double %a)
+  %div = fdiv reassoc double %1, %2
+  ret double %div
+}
+
+define double @fdiv_sin_cos_reassoc_multiple_uses(double %a) {
+; CHECK-LABEL: @fdiv_sin_cos_reassoc_multiple_uses(
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc double @llvm.sin.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc double @llvm.cos.f64(double [[A]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv reassoc double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    call void @use(double [[TMP2]])
+; CHECK-NEXT:    ret double [[DIV]]
+;
+  %1 = call reassoc double @llvm.sin.f64(double %a)
+  %2 = call reassoc double @llvm.cos.f64(double %a)
+  %div = fdiv reassoc double %1, %2
+  call void @use(double %2)
+  ret double %div
+}
+
+define double @fdiv_sin_cos_reassoc(double %a) {
+; CHECK-LABEL: @fdiv_sin_cos_reassoc(
+; CHECK-NEXT:    [[TAN:%.*]] = call reassoc double @tan(double [[A:%.*]]) #1
+; CHECK-NEXT:    ret double [[TAN]]
+;
+  %1 = call reassoc double @llvm.sin.f64(double %a)
+  %2 = call reassoc double @llvm.cos.f64(double %a)
+  %div = fdiv reassoc double %1, %2
+  ret double %div
+}
+
+define float @fdiv_sinf_cosf_reassoc(float %a) {
+; CHECK-LABEL: @fdiv_sinf_cosf_reassoc(
+; CHECK-NEXT:    [[TANF:%.*]] = call reassoc float @tanf(float [[A:%.*]]) #1
+; CHECK-NEXT:    ret float [[TANF]]
+;
+  %1 = call reassoc float @llvm.sin.f32(float %a)
+  %2 = call reassoc float @llvm.cos.f32(float %a)
+  %div = fdiv reassoc float %1, %2
+  ret float %div
+}
+
+define fp128 @fdiv_sinfp128_cosfp128_reassoc(fp128 %a) {
+; CHECK-LABEL: @fdiv_sinfp128_cosfp128_reassoc(
+; CHECK-NEXT:    [[TANL:%.*]] = call reassoc fp128 @tanl(fp128 [[A:%.*]]) #1
+; CHECK-NEXT:    ret fp128 [[TANL]]
+;
+  %1 = call reassoc fp128 @llvm.sin.fp128(fp128 %a)
+  %2 = call reassoc fp128 @llvm.cos.fp128(fp128 %a)
+  %div = fdiv reassoc fp128 %1, %2
+  ret fp128 %div
+}
+
+declare double @llvm.sin.f64(double) #1
+declare float @llvm.sin.f32(float) #1
+declare fp128 @llvm.sin.fp128(fp128) #1
+
+declare double @llvm.cos.f64(double) #1
+declare float @llvm.cos.f32(float) #1
+declare fp128 @llvm.cos.fp128(fp128) #1
+
+declare void @use(double)
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
new file mode 100644
index 00000000000..796eef93cdc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -0,0 +1,383 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define float @exact_inverse(float %x) {
+; CHECK-LABEL: @exact_inverse(
+; CHECK-NEXT:    [[DIV:%.*]] = fmul float [[X:%.*]], 1.250000e-01
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %div = fdiv float %x, 8.0
+  ret float %div
+}
+
+; Min normal float = 1.17549435E-38
+
+define float @exact_inverse2(float %x) {
+; CHECK-LABEL: @exact_inverse2(
+; CHECK-NEXT:    [[DIV:%.*]] = fmul float [[X:%.*]], 0x47D0000000000000
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %div = fdiv float %x, 0x3810000000000000
+  ret float %div
+}
+
+; Max exponent = 1.70141183E+38; don't transform to multiply with denormal.
+
+define float @exact_inverse_but_denorm(float %x) {
+; CHECK-LABEL: @exact_inverse_but_denorm(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], 0x47E0000000000000
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %div = fdiv float %x, 0x47E0000000000000
+  ret float %div
+}
+
+; Denormal = float 1.40129846E-45; inverse can't be represented.
+
+define float @not_exact_inverse2(float %x) {
+; CHECK-LABEL: @not_exact_inverse2(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], 0x36A0000000000000
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %div = fdiv float %x, 0x36A0000000000000
+  ret float %div
+}
+
+; Fast math allows us to replace this fdiv.
+
+define float @not_exact_but_allow_recip(float %x) {
+; CHECK-LABEL: @not_exact_but_allow_recip(
+; CHECK-NEXT:    [[DIV:%.*]] = fmul arcp float [[X:%.*]], 0x3FD5555560000000
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %div = fdiv arcp float %x, 3.0
+  ret float %div
+}
+
+; Fast math allows us to replace this fdiv, but we don't to avoid a denormal.
+; TODO: What if the function attributes tell us that denormals are flushed?
+
+define float @not_exact_but_allow_recip_but_denorm(float %x) {
+; CHECK-LABEL: @not_exact_but_allow_recip_but_denorm(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv arcp float [[X:%.*]], 0x47E0000100000000
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %div = fdiv arcp float %x, 0x47E0000100000000
+  ret float %div
+}
+
+define <2 x float> @exact_inverse_splat(<2 x float> %x) {
+; CHECK-LABEL: @exact_inverse_splat(
+; CHECK-NEXT:    [[DIV:%.*]] = fmul <2 x float> [[X:%.*]], <float 2.500000e-01, float 2.500000e-01>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %div = fdiv <2 x float> %x, <float 4.0, float 4.0>
+  ret <2 x float> %div
+}
+
+; Fast math allows us to replace this fdiv.
+
+define <2 x float> @not_exact_but_allow_recip_splat(<2 x float> %x) {
+; CHECK-LABEL: @not_exact_but_allow_recip_splat(
+; CHECK-NEXT:    [[DIV:%.*]] = fmul arcp <2 x float> [[X:%.*]], <float 0x3FD5555560000000, float 0x3FD5555560000000>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %div = fdiv arcp <2 x float> %x, <float 3.0, float 3.0>
+  ret <2 x float> %div
+}
+
+define <2 x float> @exact_inverse_vec(<2 x float> %x) {
+; CHECK-LABEL: @exact_inverse_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fmul <2 x float> [[X:%.*]], <float 2.500000e-01, float 1.250000e-01>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %div = fdiv <2 x float> %x, <float 4.0, float 8.0>
+  ret <2 x float> %div
+}
+
+define <2 x float> @not_exact_inverse_splat(<2 x float> %x) {
+; CHECK-LABEL: @not_exact_inverse_splat(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %div = fdiv <2 x float> %x, <float 3.0, float 3.0>
+  ret <2 x float> %div
+}
+
+define <2 x float> @not_exact_inverse_vec(<2 x float> %x) {
+; CHECK-LABEL: @not_exact_inverse_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], <float 4.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %div = fdiv <2 x float> %x, <float 4.0, float 3.0>
+  ret <2 x float> %div
+}
+
+define <2 x float> @not_exact_inverse_vec_arcp(<2 x float> %x) {
+; CHECK-LABEL: @not_exact_inverse_vec_arcp(
+; CHECK-NEXT:    [[DIV:%.*]] = fmul arcp <2 x float> [[X:%.*]], <float 2.500000e-01, float 0x3FD5555560000000>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %div = fdiv arcp <2 x float> %x, <float 4.0, float 3.0>
+  ret <2 x float> %div
+}
+
+define <2 x float> @not_exact_inverse_vec_arcp_with_undef_elt(<2 x float> %x) {
+; CHECK-LABEL: @not_exact_inverse_vec_arcp_with_undef_elt(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv arcp <2 x float> [[X:%.*]], <float undef, float 3.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %div = fdiv arcp <2 x float> %x, <float undef, float 3.0>
+  ret <2 x float> %div
+}
+
+; (X / Y) / Z --> X / (Y * Z)
+
+define float @div_with_div_numerator(float %x, float %y, float %z) {
+; CHECK-LABEL: @div_with_div_numerator(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp float [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = fdiv reassoc arcp float [[X:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret float [[DIV2]]
+;
+  %div1 = fdiv ninf float %x, %y
+  %div2 = fdiv arcp reassoc float %div1, %z
+  ret float %div2
+}
+
+; Z / (X / Y) --> (Z * Y) / X
+
+define <2 x float> @div_with_div_denominator(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @div_with_div_denominator(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = fdiv reassoc arcp <2 x float> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[DIV2]]
+;
+  %div1 = fdiv nnan <2 x float> %x, %y
+  %div2 = fdiv arcp reassoc <2 x float> %z, %div1
+  ret <2 x float> %div2
+}
+
+; Don't create an extra multiply if we can't eliminate the first div.
+
+declare void @use_f32(float)
+
+define float @div_with_div_numerator_extra_use(float %x, float %y, float %z) {
+; CHECK-LABEL: @div_with_div_numerator_extra_use(
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = fdiv fast float [[DIV1]], [[Z:%.*]]
+; CHECK-NEXT:    call void @use_f32(float [[DIV1]])
+; CHECK-NEXT:    ret float [[DIV2]]
+;
+  %div1 = fdiv float %x, %y
+  %div2 = fdiv fast float %div1, %z
+  call void @use_f32(float %div1)
+  ret float %div2
+}
+
+define float @div_with_div_denominator_extra_use(float %x, float %y, float %z) {
+; CHECK-LABEL: @div_with_div_denominator_extra_use(
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = fdiv fast float [[Z:%.*]], [[DIV1]]
+; CHECK-NEXT:    call void @use_f32(float [[DIV1]])
+; CHECK-NEXT:    ret float [[DIV2]]
+;
+  %div1 = fdiv float %x, %y
+  %div2 = fdiv fast float %z, %div1
+  call void @use_f32(float %div1)
+  ret float %div2
+}
+
+define float @fneg_fneg(float %x, float %y) {
+; CHECK-LABEL: @fneg_fneg(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %div = fdiv float %x.fneg, %y.fneg
+  ret float %div
+}
+
+; The test above shows that no FMF are needed, but show that we are not dropping FMF.
+
+define float @fneg_fneg_fast(float %x, float %y) {
+; CHECK-LABEL: @fneg_fneg_fast(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %div = fdiv fast float %x.fneg, %y.fneg
+  ret float %div
+}
+
+define <2 x float> @fneg_fneg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fneg_fneg_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %xneg = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %yneg = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %div = fdiv <2 x float> %xneg, %yneg
+  ret <2 x float> %div
+}
+
+define <2 x float> @fneg_fneg_vec_undef_elts(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fneg_fneg_vec_undef_elts(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %xneg = fsub <2 x float> <float undef, float -0.0>, %x
+  %yneg = fsub <2 x float> <float -0.0, float undef>, %y
+  %div = fdiv <2 x float> %xneg, %yneg
+  ret <2 x float> %div
+}
+
+define float @fneg_dividend_constant_divisor(float %x) {
+; CHECK-LABEL: @fneg_dividend_constant_divisor(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv nsz float [[X:%.*]], -3.000000e+00
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %neg = fsub float -0.0, %x
+  %div = fdiv nsz float %neg, 3.0
+  ret  float %div
+}
+
+define float @fneg_divisor_constant_dividend(float %x) {
+; CHECK-LABEL: @fneg_divisor_constant_dividend(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv nnan float 3.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %neg = fsub float -0.0, %x
+  %div = fdiv nnan float -3.0, %neg
+  ret float %div
+}
+
+define <2 x float> @fneg_dividend_constant_divisor_vec(<2 x float> %x) {
+; CHECK-LABEL: @fneg_dividend_constant_divisor_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv ninf <2 x float> [[X:%.*]], <float -3.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %neg = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %div = fdiv ninf <2 x float> %neg, <float 3.0, float -8.0>
+  ret <2 x float> %div
+}
+
+define <2 x float> @fneg_dividend_constant_divisor_vec_undef_elt(<2 x float> %x) {
+; CHECK-LABEL: @fneg_dividend_constant_divisor_vec_undef_elt(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv ninf <2 x float> [[X:%.*]], <float -3.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %neg = fsub <2 x float> <float undef, float -0.0>, %x
+  %div = fdiv ninf <2 x float> %neg, <float 3.0, float -8.0>
+  ret <2 x float> %div
+}
+
+define <2 x float> @fneg_divisor_constant_dividend_vec(<2 x float> %x) {
+; CHECK-LABEL: @fneg_divisor_constant_dividend_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv afn <2 x float> <float 3.000000e+00, float -5.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %neg = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %div = fdiv afn <2 x float> <float -3.0, float 5.0>, %neg
+  ret <2 x float> %div
+}
+
+; X / (X * Y) --> 1.0 / Y
+
+define float @div_factor(float %x, float %y) {
+; CHECK-LABEL: @div_factor(
+; CHECK-NEXT:    [[D:%.*]] = fdiv reassoc nnan float 1.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    ret float [[D]]
+;
+  %m = fmul float %x, %y
+  %d = fdiv nnan reassoc float %x, %m
+  ret float %d;
+}
+
+; We can't do the transform without 'nnan' because if x is NAN and y is a number, this should return NAN.
+
+define float @div_factor_too_strict(float %x, float %y) {
+; CHECK-LABEL: @div_factor_too_strict(
+; CHECK-NEXT:    [[M:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fdiv reassoc float [[X]], [[M]]
+; CHECK-NEXT:    ret float [[D]]
+;
+  %m = fmul float %x, %y
+  %d = fdiv reassoc float %x, %m
+  ret float %d
+}
+
+; Commute, verify vector types, and show that we are not dropping extra FMF.
+; X / (Y * X) --> 1.0 / Y
+
+define <2 x float> @div_factor_commute(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @div_factor_commute(
+; CHECK-NEXT:    [[D:%.*]] = fdiv reassoc nnan ninf nsz <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[D]]
+;
+  %m = fmul <2 x float> %y, %x
+  %d = fdiv nnan ninf nsz reassoc <2 x float> %x, %m
+  ret <2 x float> %d
+}
+
+; C1/(X*C2) => (C1/C2) / X
+
+define <2 x float> @div_constant_dividend1(<2 x float> %x) {
+; CHECK-LABEL: @div_constant_dividend1(
+; CHECK-NEXT:    [[T2:%.*]] = fdiv reassoc arcp <2 x float> <float 5.000000e+00, float 1.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[T2]]
+;
+  %t1 = fmul <2 x float> %x, <float 3.0e0, float 7.0e0>
+  %t2 = fdiv arcp reassoc <2 x float> <float 15.0e0, float 7.0e0>, %t1
+  ret <2 x float> %t2
+}
+
+define <2 x float> @div_constant_dividend1_arcp_only(<2 x float> %x) {
+; CHECK-LABEL: @div_constant_dividend1_arcp_only(
+; CHECK-NEXT:    [[T1:%.*]] = fmul <2 x float> [[X:%.*]], <float 3.000000e+00, float 7.000000e+00>
+; CHECK-NEXT:    [[T2:%.*]] = fdiv arcp <2 x float> <float 1.500000e+01, float 7.000000e+00>, [[T1]]
+; CHECK-NEXT:    ret <2 x float> [[T2]]
+;
+  %t1 = fmul <2 x float> %x, <float 3.0e0, float 7.0e0>
+  %t2 = fdiv arcp <2 x float> <float 15.0e0, float 7.0e0>, %t1
+  ret <2 x float> %t2
+}
+
+; C1/(X/C2) => (C1*C2) / X
+
+define <2 x float> @div_constant_dividend2(<2 x float> %x) {
+; CHECK-LABEL: @div_constant_dividend2(
+; CHECK-NEXT:    [[T2:%.*]] = fdiv reassoc arcp <2 x float> <float 4.500000e+01, float 4.900000e+01>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[T2]]
+;
+  %t1 = fdiv <2 x float> %x, <float 3.0e0, float -7.0e0>
+  %t2 = fdiv arcp reassoc <2 x float> <float 15.0e0, float -7.0e0>, %t1
+  ret <2 x float> %t2
+}
+
+define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) {
+; CHECK-LABEL: @div_constant_dividend2_reassoc_only(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv <2 x float> [[X:%.*]], <float 3.000000e+00, float -7.000000e+00>
+; CHECK-NEXT:    [[T2:%.*]] = fdiv reassoc <2 x float> <float 1.500000e+01, float -7.000000e+00>, [[T1]]
+; CHECK-NEXT:    ret <2 x float> [[T2]]
+;
+  %t1 = fdiv <2 x float> %x, <float 3.0e0, float -7.0e0>
+  %t2 = fdiv reassoc <2 x float> <float 15.0e0, float -7.0e0>, %t1
+  ret <2 x float> %t2
+}
+
+; C1/(C2/X) => (C1/C2) * X
+; This tests the combination of 2 folds: (C1 * X) / C2 --> (C1 / C2) * X
+
+define <2 x float> @div_constant_dividend3(<2 x float> %x) {
+; CHECK-LABEL: @div_constant_dividend3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 1.500000e+01, float -7.000000e+00>
+; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], <float 0x3FD5555560000000, float 0x3FC24924A0000000>
+; CHECK-NEXT:    ret <2 x float> [[T2]]
+;
+  %t1 = fdiv <2 x float> <float 3.0e0, float 7.0e0>, %x
+  %t2 = fdiv arcp reassoc <2 x float> <float 15.0e0, float -7.0e0>, %t1
+  ret <2 x float> %t2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/ffs-1.ll b/llvm/test/Transforms/InstCombine/ffs-1.ll
new file mode 100644
index 00000000000..5be47efb178
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ffs-1.ll
@@ -0,0 +1,193 @@
+; Test that the ffs* library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S                                    | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC
+; RUN: opt < %s -instcombine -mtriple i386-pc-linux -S             | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=arm64-apple-ios9.0 -S        | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=arm64-apple-tvos9.0 -S       | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=thumbv7k-apple-watchos2.0 -S | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx10.11 -S  | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+; RUN: opt < %s -instcombine -mtriple=x86_64-freebsd-gnu -S        | FileCheck %s --check-prefix=ALL --check-prefix=TARGET
+
+declare i32 @ffs(i32)
+declare i32 @ffsl(i32)
+declare i32 @ffsll(i64)
+
+; Check ffs(0) -> 0.
+
+define i32 @test_simplify1() {
+; ALL-LABEL: @test_simplify1(
+; ALL-NEXT:    ret i32 0
+;
+  %ret = call i32 @ffs(i32 0)
+  ret i32 %ret
+}
+
+define i32 @test_simplify2() {
+; GENERIC-LABEL: @test_simplify2(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsl(i32 0)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify2(
+; TARGET-NEXT:    ret i32 0
+;
+  %ret = call i32 @ffsl(i32 0)
+  ret i32 %ret
+}
+
+define i32 @test_simplify3() {
+; GENERIC-LABEL: @test_simplify3(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsll(i64 0)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify3(
+; TARGET-NEXT:    ret i32 0
+;
+  %ret = call i32 @ffsll(i64 0)
+  ret i32 %ret
+}
+
+; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
+
+define i32 @test_simplify4() {
+; ALL-LABEL: @test_simplify4(
+; ALL-NEXT:    ret i32 1
+;
+  %ret = call i32 @ffs(i32 1)
+  ret i32 %ret
+}
+
+define i32 @test_simplify5() {
+; ALL-LABEL: @test_simplify5(
+; ALL-NEXT:    ret i32 12
+;
+  %ret = call i32 @ffs(i32 2048)
+  ret i32 %ret
+}
+
+define i32 @test_simplify6() {
+; ALL-LABEL: @test_simplify6(
+; ALL-NEXT:    ret i32 17
+;
+  %ret = call i32 @ffs(i32 65536)
+  ret i32 %ret
+}
+
+define i32 @test_simplify7() {
+; GENERIC-LABEL: @test_simplify7(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsl(i32 65536)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify7(
+; TARGET-NEXT:    ret i32 17
+;
+  %ret = call i32 @ffsl(i32 65536)
+  ret i32 %ret
+}
+
+define i32 @test_simplify8() {
+; GENERIC-LABEL: @test_simplify8(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsll(i64 1024)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify8(
+; TARGET-NEXT:    ret i32 11
+;
+  %ret = call i32 @ffsll(i64 1024)
+  ret i32 %ret
+}
+
+define i32 @test_simplify9() {
+; GENERIC-LABEL: @test_simplify9(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsll(i64 65536)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify9(
+; TARGET-NEXT:    ret i32 17
+;
+  %ret = call i32 @ffsll(i64 65536)
+  ret i32 %ret
+}
+
+define i32 @test_simplify10() {
+; GENERIC-LABEL: @test_simplify10(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsll(i64 17179869184)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify10(
+; TARGET-NEXT:    ret i32 35
+;
+  %ret = call i32 @ffsll(i64 17179869184)
+  ret i32 %ret
+}
+
+define i32 @test_simplify11() {
+; GENERIC-LABEL: @test_simplify11(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsll(i64 281474976710656)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify11(
+; TARGET-NEXT:    ret i32 49
+;
+  %ret = call i32 @ffsll(i64 281474976710656)
+  ret i32 %ret
+}
+
+define i32 @test_simplify12() {
+; GENERIC-LABEL: @test_simplify12(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsll(i64 1152921504606846976)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify12(
+; TARGET-NEXT:    ret i32 61
+;
+  %ret = call i32 @ffsll(i64 1152921504606846976)
+  ret i32 %ret
+}
+
+; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
+
+define i32 @test_simplify13(i32 %x) {
+; ALL-LABEL: @test_simplify13(
+; ALL-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true), !range !0
+; ALL-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
+; ALL-NEXT:    [[TMP2:%.*]] = icmp eq i32 %x, 0
+; ALL-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
+; ALL-NEXT:    ret i32 [[TMP3]]
+;
+  %ret = call i32 @ffs(i32 %x)
+  ret i32 %ret
+}
+
+define i32 @test_simplify14(i32 %x) {
+; GENERIC-LABEL: @test_simplify14(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsl(i32 %x)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify14(
+; TARGET-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true), !range !0
+; TARGET-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
+; TARGET-NEXT:    [[TMP2:%.*]] = icmp eq i32 %x, 0
+; TARGET-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
+; TARGET-NEXT:    ret i32 [[TMP3]]
+;
+  %ret = call i32 @ffsl(i32 %x)
+  ret i32 %ret
+}
+
+define i32 @test_simplify15(i64 %x) {
+; GENERIC-LABEL: @test_simplify15(
+; GENERIC-NEXT:    [[RET:%.*]] = call i32 @ffsll(i64 %x)
+; GENERIC-NEXT:    ret i32 [[RET]]
+;
+; TARGET-LABEL: @test_simplify15(
+; TARGET-NEXT:    [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true), !range !1
+; TARGET-NEXT:    [[TMP1:%.*]] = trunc i64 [[CTTZ]] to i32
+; TARGET-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
+; TARGET-NEXT:    [[TMP3:%.*]] = icmp eq i64 %x, 0
+; TARGET-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; TARGET-NEXT:    ret i32 [[TMP4]]
+;
+  %ret = call i32 @ffsll(i64 %x)
+  ret i32 %ret
+}
+
diff --git a/llvm/test/Transforms/InstCombine/float-shrink-compare.ll b/llvm/test/Transforms/InstCombine/float-shrink-compare.ll
new file mode 100644
index 00000000000..2cf4df54254
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/float-shrink-compare.ll
@@ -0,0 +1,473 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i1 @test1(float %x, float %y) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[CEIL:%.*]] = call float @llvm.ceil.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[CEIL]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %ceil = call double @ceil(double %x.ext) nounwind readnone
+  %ext.y = fpext float %y to double
+  %cmp = fcmp oeq double %ceil, %ext.y
+  ret i1 %cmp
+}
+
+define i1 @test1_intrin(float %x, float %y) {
+; CHECK-LABEL: @test1_intrin(
+; CHECK-NEXT:    [[CEIL:%.*]] = call float @llvm.ceil.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[CEIL]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %ceil = call double @llvm.ceil.f64(double %x.ext) nounwind readnone
+  %ext.y = fpext float %y to double
+  %cmp = fcmp oeq double %ceil, %ext.y
+  ret i1 %cmp
+}
+
+define i1 @test2(float %x, float %y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[FABS]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %fabs = call double @fabs(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %fabs, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test2_intrin(float %x, float %y) {
+; CHECK-LABEL: @test2_intrin(
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[FABS]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %fabs = call double @llvm.fabs.f64(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %fabs, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @fmf_test2(float %x, float %y) {
+; CHECK-LABEL: @fmf_test2(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan float @llvm.fabs.f32(float %x)
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], %y
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = fpext float %x to double
+  %2 = call nnan double @fabs(double %1) nounwind readnone
+  %3 = fpext float %y to double
+  %4 = fcmp oeq double %2, %3
+  ret i1 %4
+}
+
+define i1 @test3(float %x, float %y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[FLOOR:%.*]] = call float @llvm.floor.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[FLOOR]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %floor = call double @floor(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %floor, %y.ext
+  ret i1 %cmp
+}
+
+
+define i1 @test3_intrin(float %x, float %y) {
+; CHECK-LABEL: @test3_intrin(
+; CHECK-NEXT:    [[FLOOR:%.*]] = call float @llvm.floor.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[FLOOR]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %floor = call double @llvm.floor.f64(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %floor, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test4(float %x, float %y) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[NEARBYINT:%.*]] = call float @llvm.nearbyint.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[NEARBYINT]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %nearbyint = call double @nearbyint(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %nearbyint, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @shrink_nearbyint_intrin(float %x, float %y) {
+; CHECK-LABEL: @shrink_nearbyint_intrin(
+; CHECK-NEXT:    [[NEARBYINT:%.*]] = call float @llvm.nearbyint.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[NEARBYINT]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %nearbyint = call double @llvm.nearbyint.f64(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %nearbyint, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test5(float %x, float %y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[RINT:%.*]] = call float @llvm.rint.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[RINT]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %rint = call double @rint(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %rint, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test6(float %x, float %y) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[ROUND:%.*]] = call float @llvm.round.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[ROUND]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %round = call double @round(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %round, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test6_intrin(float %x, float %y) {
+; CHECK-LABEL: @test6_intrin(
+; CHECK-NEXT:    [[ROUND:%.*]] = call float @llvm.round.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[ROUND]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %round = call double @llvm.round.f64(double %x.ext) nounwind readnone
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %round, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test7(float %x, float %y) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TRUNC:%.*]] = call float @llvm.trunc.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[TRUNC]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %trunc = call double @trunc(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %trunc, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test7_intrin(float %x, float %y) {
+; CHECK-LABEL: @test7_intrin(
+; CHECK-NEXT:    [[TRUNC:%.*]] = call float @llvm.trunc.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[TRUNC]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %trunc = call double @llvm.trunc.f64(double %x.ext) nounwind
+  %y.ext = fpext float %y to double
+  %cmp = fcmp oeq double %trunc, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test8(float %x, float %y) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[CEIL:%.*]] = call float @llvm.ceil.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[CEIL]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %ceil = call double @ceil(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %ceil
+  ret i1 %cmp
+}
+
+define i1 @test8_intrin(float %x, float %y) {
+; CHECK-LABEL: @test8_intrin(
+; CHECK-NEXT:    [[CEIL:%.*]] = call float @llvm.ceil.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[CEIL]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %ceil = call double @llvm.ceil.f64(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %ceil
+  ret i1 %cmp
+}
+
+define i1 @test9(float %x, float %y) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[FABS]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %fabs = call double @fabs(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %fabs
+  ret i1 %cmp
+}
+
+define i1 @test9_intrin(float %x, float %y) {
+; CHECK-LABEL: @test9_intrin(
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[FABS]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %fabs = call double @llvm.fabs.f64(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %fabs
+  ret i1 %cmp
+}
+
+define i1 @test10(float %x, float %y) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[FLOOR:%.*]] = call float @llvm.floor.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[FLOOR]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %floor = call double @floor(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %floor, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test10_intrin(float %x, float %y) {
+; CHECK-LABEL: @test10_intrin(
+; CHECK-NEXT:    [[FLOOR:%.*]] = call float @llvm.floor.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[FLOOR]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %floor = call double @llvm.floor.f64(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %floor, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test11(float %x, float %y) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[NEARBYINT:%.*]] = call float @llvm.nearbyint.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[NEARBYINT]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %nearbyint = call double @nearbyint(double %x.ext) nounwind
+  %cmp = fcmp oeq double %nearbyint, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test11_intrin(float %x, float %y) {
+; CHECK-LABEL: @test11_intrin(
+; CHECK-NEXT:    [[NEARBYINT:%.*]] = call float @llvm.nearbyint.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[NEARBYINT]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %nearbyint = call double @llvm.nearbyint.f64(double %x.ext) nounwind
+  %cmp = fcmp oeq double %nearbyint, %y.ext
+  ret i1 %cmp
+}
+
+define i1 @test12(float %x, float %y) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[RINT:%.*]] = call float @llvm.rint.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[RINT]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %rint = call double @rint(double %x.ext) nounwind
+  %cmp = fcmp oeq double %y.ext, %rint
+  ret i1 %cmp
+}
+
+define i1 @test13(float %x, float %y) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[ROUND:%.*]] = call float @llvm.round.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[ROUND]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %round = call double @round(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %round
+  ret i1 %cmp
+}
+
+define i1 @test13_intrin(float %x, float %y) {
+; CHECK-LABEL: @test13_intrin(
+; CHECK-NEXT:    [[ROUND:%.*]] = call float @llvm.round.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[ROUND]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %round = call double @llvm.round.f64(double %x.ext) nounwind readnone
+  %cmp = fcmp oeq double %y.ext, %round
+  ret i1 %cmp
+}
+
+define i1 @test14(float %x, float %y) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[TRUNC:%.*]] = call float @llvm.trunc.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[TRUNC]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %trunc = call double @trunc(double %x.ext) nounwind
+  %cmp = fcmp oeq double %y.ext, %trunc
+  ret i1 %cmp
+}
+
+define i1 @test14_intrin(float %x, float %y) {
+; CHECK-LABEL: @test14_intrin(
+; CHECK-NEXT:    [[TRUNC:%.*]] = call float @llvm.trunc.f32(float %x)
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[TRUNC]], %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %trunc = call double @llvm.trunc.f64(double %x.ext) nounwind
+  %cmp = fcmp oeq double %y.ext, %trunc
+  ret i1 %cmp
+}
+
+define i1 @test15(float %x, float %y, float %z) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @fmin(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  ret i1 %5
+}
+
+define i1 @test16(float %x, float %y, float %z) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = fpext float %z to double
+  %2 = fpext float %x to double
+  %3 = fpext float %y to double
+  %4 = call double @fmin(double %2, double %3) nounwind
+  %5 = fcmp oeq double %1, %4
+  ret i1 %5
+}
+
+define i1 @test17(float %x, float %y, float %z) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @fmax(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  ret i1 %5
+}
+
+define i1 @test18(float %x, float %y, float %z) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = fpext float %z to double
+  %2 = fpext float %x to double
+  %3 = fpext float %y to double
+  %4 = call double @fmax(double %2, double %3) nounwind
+  %5 = fcmp oeq double %1, %4
+  ret i1 %5
+}
+
+define i1 @test19(float %x, float %y, float %z) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[COPYSIGNF:%.*]] = call float @copysignf(float %x, float %y) #0
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[COPYSIGNF]], %z
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @copysign(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  ret i1 %5
+}
+
+define i1 @test20(float %x, float %y) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[FMINF:%.*]] = call float @fminf(float 1.000000e+00, float %x) #0
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %y
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @fmin(double 1.000000e+00, double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  ret i1 %4
+}
+
+; should not be changed to fminf as the constant would lose precision
+
+define i1 @test21(float %x, float %y) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext float %y to double
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext float %x to double
+; CHECK-NEXT:    [[TMP3:%.*]] = call double @fmin(double 1.300000e+00, double [[TMP2]]) #2
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq double [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @fmin(double 1.300000e+00, double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  ret i1 %4
+}
+
+declare double @fabs(double) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare double @copysign(double, double) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare double @nearbyint(double) nounwind readnone
+declare double @rint(double) nounwind readnone
+declare double @round(double) nounwind readnone
+declare double @trunc(double) nounwind readnone
+declare double @fmin(double, double) nounwind readnone
+declare double @fmax(double, double) nounwind readnone
+
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.ceil.f64(double) nounwind readnone
+declare double @llvm.floor.f64(double) nounwind readnone
+declare double @llvm.nearbyint.f64(double) nounwind readnone
+declare double @llvm.round.f64(double) nounwind readnone
+declare double @llvm.trunc.f64(double) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/fls.ll b/llvm/test/Transforms/InstCombine/fls.ll
new file mode 100644
index 00000000000..bf87e6a3110
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fls.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target triple = "x86_64-unknown-freebsd11.0"
+
+define i32 @myfls() {
+; CHECK-LABEL: @myfls(
+; CHECK-NEXT:    ret i32 6
+;
+  %call = call i32 @fls(i32 42)
+  ret i32 %call
+}
+
+define i32 @myflsl() {
+; CHECK-LABEL: @myflsl(
+; CHECK-NEXT:    ret i32 6
+;
+  %patatino = call i32 @flsl(i64 42)
+  ret i32 %patatino
+}
+
+define i32 @myflsll() {
+; CHECK-LABEL: @myflsll(
+; CHECK-NEXT:    ret i32 6
+;
+  %whatever = call i32 @flsll(i64 42)
+  ret i32 %whatever
+}
+
+; Lower to llvm.ctlz() if the argument is not a constant
+
+define i32 @flsnotconst(i64 %z) {
+; CHECK-LABEL: @flsnotconst(
+; CHECK-NEXT:    [[CTLZ:%.*]] = call i64 @llvm.ctlz.i64(i64 %z, i1 false), !range !0
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[CTLZ]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 64, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %goo = call i32 @flsl(i64 %z)
+  ret i32 %goo
+}
+
+; Make sure we lower fls(0) to 0 and not to `undef`.
+
+define i32 @flszero() {
+; CHECK-LABEL: @flszero(
+; CHECK-NEXT:    ret i32 0
+;
+  %zero = call i32 @fls(i32 0)
+  ret i32 %zero
+}
+
+declare i32 @fls(i32)
+declare i32 @flsl(i64)
+declare i32 @flsll(i64)
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
new file mode 100644
index 00000000000..7bb6619b4fd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -0,0 +1,277 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float) #1
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
+declare float @llvm.fmuladd.f32(float, float, float) #1
+declare float @llvm.fabs.f32(float) #1
+
+@external = external global i32
+
+define float @fma_fneg_x_fneg_y(float %x, float %y, float %z) {
+; CHECK-LABEL: @fma_fneg_x_fneg_y(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %fma = call float @llvm.fma.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fma
+}
+
+define <2 x float> @fma_fneg_x_fneg_y_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fma_fneg_x_fneg_y_vec(
+; CHECK-NEXT:    [[FMA:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[FMA]]
+;
+  %xn = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %yn = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %xn, <2 x float> %yn, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
+define <2 x float> @fma_fneg_x_fneg_y_vec_undef(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fma_fneg_x_fneg_y_vec_undef(
+; CHECK-NEXT:    [[FMA:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[FMA]]
+;
+  %xn = fsub <2 x float> <float -0.0, float undef>, %x
+  %yn = fsub <2 x float> <float undef, float -0.0>, %y
+  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %xn, <2 x float> %yn, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
+define float @fma_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
+; CHECK-LABEL: @fma_fneg_x_fneg_y_fast(
+; CHECK-NEXT:    [[FMA:%.*]] = call fast float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %fma = call fast float @llvm.fma.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fma
+}
+
+define float @fma_fneg_const_fneg_y(float %y, float %z) {
+; CHECK-LABEL: @fma_fneg_const_fneg_y(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[Y:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %y.fneg = fsub float -0.0, %y
+  %fma = call float @llvm.fma.f32(float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z)
+  ret float %fma
+}
+
+define float @fma_fneg_x_fneg_const(float %x, float %z) {
+; CHECK-LABEL: @fma_fneg_x_fneg_const(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %fma = call float @llvm.fma.f32(float %x.fneg, float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %z)
+  ret float %fma
+}
+
+define float @fma_fabs_x_fabs_y(float %x, float %y, float %z) {
+; CHECK-LABEL: @fma_fabs_x_fabs_y(
+; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X_FABS]], float [[Y_FABS]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %y.fabs = call float @llvm.fabs.f32(float %y)
+  %fma = call float @llvm.fma.f32(float %x.fabs, float %y.fabs, float %z)
+  ret float %fma
+}
+
+define float @fma_fabs_x_fabs_x(float %x, float %z) {
+; CHECK-LABEL: @fma_fabs_x_fabs_x(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[X]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %fma = call float @llvm.fma.f32(float %x.fabs, float %x.fabs, float %z)
+  ret float %fma
+}
+
+define float @fma_fabs_x_fabs_x_fast(float %x, float %z) {
+; CHECK-LABEL: @fma_fabs_x_fabs_x_fast(
+; CHECK-NEXT:    [[FMA:%.*]] = call fast float @llvm.fma.f32(float [[X:%.*]], float [[X]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %fma = call fast float @llvm.fma.f32(float %x.fabs, float %x.fabs, float %z)
+  ret float %fma
+}
+
+define float @fmuladd_fneg_x_fneg_y(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmuladd_fneg_x_fneg_y(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
+define float @fmuladd_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmuladd_fneg_x_fneg_y_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[FMULADD:%.*]] = fadd fast float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %fmuladd = call fast float @llvm.fmuladd.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
+define float @fmuladd_fneg_const_fneg_y(float %y, float %z) {
+; CHECK-LABEL: @fmuladd_fneg_const_fneg_y(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[Y:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %y.fneg = fsub float -0.0, %y
+  %fmuladd = call float @llvm.fmuladd.f32(float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
+define float @fmuladd_fneg_x_fneg_const(float %x, float %z) {
+; CHECK-LABEL: @fmuladd_fneg_x_fneg_const(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fneg, float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %z)
+  ret float %fmuladd
+}
+
+define float @fmuladd_fabs_x_fabs_y(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmuladd_fabs_x_fabs_y(
+; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X_FABS]], float [[Y_FABS]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %y.fabs = call float @llvm.fabs.f32(float %y)
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fabs, float %y.fabs, float %z)
+  ret float %fmuladd
+}
+
+define float @fmuladd_fabs_x_fabs_x(float %x, float %z) {
+; CHECK-LABEL: @fmuladd_fabs_x_fabs_x(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float [[X]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fabs, float %x.fabs, float %z)
+  ret float %fmuladd
+}
+
+define float @fmuladd_fabs_x_fabs_x_fast(float %x, float %z) {
+; CHECK-LABEL: @fmuladd_fabs_x_fabs_x_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[FMULADD:%.*]] = fadd fast float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %fmuladd = call fast float @llvm.fmuladd.f32(float %x.fabs, float %x.fabs, float %z)
+  ret float %fmuladd
+}
+
+define float @fma_k_y_z(float %y, float %z) {
+; CHECK-LABEL: @fma_k_y_z(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[Y:%.*]], float 4.000000e+00, float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = call float @llvm.fma.f32(float 4.0, float %y, float %z)
+  ret float %fma
+}
+
+define float @fma_k_y_z_fast(float %y, float %z) {
+; CHECK-LABEL: @fma_k_y_z_fast(
+; CHECK-NEXT:    [[FMA:%.*]] = call fast float @llvm.fma.f32(float [[Y:%.*]], float 4.000000e+00, float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = call fast float @llvm.fma.f32(float 4.0, float %y, float %z)
+  ret float %fma
+}
+
+define float @fmuladd_k_y_z_fast(float %y, float %z) {
+; CHECK-LABEL: @fmuladd_k_y_z_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[Y:%.*]], 4.000000e+00
+; CHECK-NEXT:    [[FMULADD:%.*]] = fadd fast float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %fmuladd = call fast float @llvm.fmuladd.f32(float 4.0, float %y, float %z)
+  ret float %fmuladd
+}
+
+define float @fma_1_y_z(float %y, float %z) {
+; CHECK-LABEL: @fma_1_y_z(
+; CHECK-NEXT:    [[FMA:%.*]] = fadd float [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = call float @llvm.fma.f32(float 1.0, float %y, float %z)
+  ret float %fma
+}
+
+define float @fma_x_1_z(float %x, float %z) {
+; CHECK-LABEL: @fma_x_1_z(
+; CHECK-NEXT:    [[FMA:%.*]] = fadd float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = call float @llvm.fma.f32(float %x, float 1.0, float %z)
+  ret float %fma
+}
+
+define <2 x float> @fma_x_1_z_v2f32(<2 x float> %x, <2 x float> %z) {
+; CHECK-LABEL: @fma_x_1_z_v2f32(
+; CHECK-NEXT:    [[FMA:%.*]] = fadd <2 x float> [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[FMA]]
+;
+  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> <float 1.0, float 1.0>, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
+define <2 x float> @fma_x_1_2_z_v2f32(<2 x float> %x, <2 x float> %z) {
+; CHECK-LABEL: @fma_x_1_2_z_v2f32(
+; CHECK-NEXT:    [[FMA:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[Z:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[FMA]]
+;
+  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> <float 1.0, float 2.0>, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
+define float @fma_x_1_z_fast(float %x, float %z) {
+; CHECK-LABEL: @fma_x_1_z_fast(
+; CHECK-NEXT:    [[FMA:%.*]] = fadd fast float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = call fast float @llvm.fma.f32(float %x, float 1.0, float %z)
+  ret float %fma
+}
+
+define float @fma_1_1_z(float %z) {
+; CHECK-LABEL: @fma_1_1_z(
+; CHECK-NEXT:    [[FMA:%.*]] = fadd float [[Z:%.*]], 1.000000e+00
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %fma = call float @llvm.fma.f32(float 1.0, float 1.0, float %z)
+  ret float %fma
+}
+
+define float @fmuladd_x_1_z_fast(float %x, float %z) {
+; CHECK-LABEL: @fmuladd_x_1_z_fast(
+; CHECK-NEXT:    [[FMULADD:%.*]] = fadd fast float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %fmuladd = call fast float @llvm.fmuladd.f32(float %x, float 1.0, float %z)
+  ret float %fmuladd
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/fmul-exp.ll b/llvm/test/Transforms/InstCombine/fmul-exp.ll
new file mode 100644
index 00000000000..28542f45294
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmul-exp.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare double @llvm.exp.f64(double) nounwind readnone speculatable
+declare void @use(double)
+
+; exp(a) * exp(b) no reassoc flags
+define double @exp_a_exp_b(double %a, double %b) {
+; CHECK-LABEL: @exp_a_exp_b(
+; CHECK-NEXT:    [[TMP:%.*]] = call double @llvm.exp.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.exp.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP]], [[TMP1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %tmp = call double @llvm.exp.f64(double %a)
+  %tmp1 = call double @llvm.exp.f64(double %b)
+  %mul = fmul double %tmp, %tmp1
+  ret double %mul
+}
+
+; exp(a) * exp(b) reassoc, multiple uses
+define double @exp_a_exp_b_multiple_uses(double %a, double %b) {
+; CHECK-LABEL: @exp_a_exp_b_multiple_uses(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.exp.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[TMP:%.*]] = fadd reassoc double [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc double @llvm.exp.f64(double [[TMP]])
+; CHECK-NEXT:    call void @use(double [[TMP1]])
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %tmp = call double @llvm.exp.f64(double %a)
+  %tmp1 = call double @llvm.exp.f64(double %b)
+  %mul = fmul reassoc double %tmp, %tmp1
+  call void @use(double %tmp1)
+  ret double %mul
+}
+
+; exp(a) * exp(b) reassoc, both with multiple uses
+define double @exp_a_exp_b_multiple_uses_both(double %a, double %b) {
+; CHECK-LABEL: @exp_a_exp_b_multiple_uses_both(
+; CHECK-NEXT:    [[TMP:%.*]] = call double @llvm.exp.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.exp.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[TMP]], [[TMP1]]
+; CHECK-NEXT:    call void @use(double [[TMP]])
+; CHECK-NEXT:    call void @use(double [[TMP1]])
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %tmp = call double @llvm.exp.f64(double %a)
+  %tmp1 = call double @llvm.exp.f64(double %b)
+  %mul = fmul reassoc double %tmp, %tmp1
+  call void @use(double %tmp)
+  call void @use(double %tmp1)
+  ret double %mul
+}
+
+; exp(a) * exp(b) => exp(a+b) with reassoc
+define double @exp_a_exp_b_reassoc(double %a, double %b) {
+; CHECK-LABEL: @exp_a_exp_b_reassoc(
+; CHECK-NEXT:    [[TMP:%.*]] = fadd reassoc double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc double @llvm.exp.f64(double [[TMP]])
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %tmp = call double @llvm.exp.f64(double %a)
+  %tmp1 = call double @llvm.exp.f64(double %b)
+  %mul = fmul reassoc double %tmp, %tmp1
+  ret double %mul
+}
+
+; exp(a) * exp(b) * exp(c) * exp(d) => exp(a+b+c+d) with reassoc
+define double @exp_a_exp_b_exp_c_exp_d_fast(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: @exp_a_exp_b_exp_c_exp_d_fast(
+; CHECK-NEXT:    [[TMP:%.*]] = fadd reassoc double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc double [[TMP]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc double [[TMP1]], [[D:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call reassoc double @llvm.exp.f64(double [[TMP2]])
+; CHECK-NEXT:    ret double [[TMP3]]
+;
+  %tmp = call double @llvm.exp.f64(double %a)
+  %tmp1 = call double @llvm.exp.f64(double %b)
+  %mul = fmul reassoc double %tmp, %tmp1
+  %tmp2 = call double @llvm.exp.f64(double %c)
+  %mul1 = fmul reassoc double %mul, %tmp2
+  %tmp3 = call double @llvm.exp.f64(double %d)
+  %mul2 = fmul reassoc double %mul1, %tmp3
+  ret double %mul2
+}
diff --git a/llvm/test/Transforms/InstCombine/fmul-exp2.ll b/llvm/test/Transforms/InstCombine/fmul-exp2.ll
new file mode 100644
index 00000000000..f09013847bb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmul-exp2.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare double @llvm.exp2.f64(double) nounwind readnone speculatable
+declare void @use(double)
+
+; exp2(a) * exp2(b) no reassoc flags
+define double @exp2_a_exp2_b(double %a, double %b) {
+; CHECK-LABEL: @exp2_a_exp2_b(
+; CHECK-NEXT:    [[TMP:%.*]] = call double @llvm.exp2.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.exp2.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP]], [[TMP1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %tmp = call double @llvm.exp2.f64(double %a)
+  %tmp1 = call double @llvm.exp2.f64(double %b)
+  %mul = fmul double %tmp, %tmp1
+  ret double %mul
+}
+
+; exp2(a) * exp2(b) reassoc, multiple uses
+define double @exp2_a_exp2_b_multiple_uses(double %a, double %b) {
+; CHECK-LABEL: @exp2_a_exp2_b_multiple_uses(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.exp2.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[TMP:%.*]] = fadd reassoc double [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc double @llvm.exp2.f64(double [[TMP]])
+; CHECK-NEXT:    call void @use(double [[TMP1]])
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %tmp = call double @llvm.exp2.f64(double %a)
+  %tmp1 = call double @llvm.exp2.f64(double %b)
+  %mul = fmul reassoc double %tmp, %tmp1
+  call void @use(double %tmp1)
+  ret double %mul
+}
+
+; exp2(a) * exp2(b) reassoc, both with multiple uses
+define double @exp2_a_exp2_b_multiple_uses_both(double %a, double %b) {
+; CHECK-LABEL: @exp2_a_exp2_b_multiple_uses_both(
+; CHECK-NEXT:    [[TMP:%.*]] = call double @llvm.exp2.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.exp2.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[TMP]], [[TMP1]]
+; CHECK-NEXT:    call void @use(double [[TMP]])
+; CHECK-NEXT:    call void @use(double [[TMP1]])
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %tmp = call double @llvm.exp2.f64(double %a)
+  %tmp1 = call double @llvm.exp2.f64(double %b)
+  %mul = fmul reassoc double %tmp, %tmp1
+  call void @use(double %tmp)
+  call void @use(double %tmp1)
+  ret double %mul
+}
+
+; exp2(a) * exp2(b) => exp2(a+b) with reassoc
+define double @exp2_a_exp2_b_reassoc(double %a, double %b) {
+; CHECK-LABEL: @exp2_a_exp2_b_reassoc(
+; CHECK-NEXT:    [[TMP:%.*]] = fadd reassoc double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc double @llvm.exp2.f64(double [[TMP]])
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %tmp = call double @llvm.exp2.f64(double %a)
+  %tmp1 = call double @llvm.exp2.f64(double %b)
+  %mul = fmul reassoc double %tmp, %tmp1
+  ret double %mul
+}
+
+; exp2(a) * exp2(b) * exp2(c) * exp2(d) => exp2(a+b+c+d) with reassoc
+define double @exp2_a_exp2_b_exp2_c_exp2_d(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: @exp2_a_exp2_b_exp2_c_exp2_d(
+; CHECK-NEXT:    [[TMP:%.*]] = fadd reassoc double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc double [[TMP]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc double [[TMP1]], [[D:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call reassoc double @llvm.exp2.f64(double [[TMP2]])
+; CHECK-NEXT:    ret double [[TMP3]]
+;
+  %tmp = call double @llvm.exp2.f64(double %a)
+  %tmp1 = call double @llvm.exp2.f64(double %b)
+  %mul = fmul reassoc double %tmp, %tmp1
+  %tmp2 = call double @llvm.exp2.f64(double %c)
+  %mul1 = fmul reassoc double %mul, %tmp2
+  %tmp3 = call double @llvm.exp2.f64(double %d)
+  %mul2 = fmul reassoc double %mul1, %tmp3
+  ret double %mul2
+}
diff --git a/llvm/test/Transforms/InstCombine/fmul-pow.ll b/llvm/test/Transforms/InstCombine/fmul-pow.ll
new file mode 100644
index 00000000000..7a020627269
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmul-pow.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare double @llvm.pow.f64(double, double)
+
+define double @pow_ab_a(double %a, double %b)  {
+; CHECK-LABEL: @pow_ab_a(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP1]], [[A]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call double @llvm.pow.f64(double %a, double %b)
+  %mul = fmul double %1, %a
+  ret double %mul
+}
+
+define double @pow_ab_a_reassoc(double %a, double %b)  {
+; CHECK-LABEL: @pow_ab_a_reassoc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[TMP1]], [[A]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call double @llvm.pow.f64(double %a, double %b)
+  %mul = fmul reassoc double %1, %a
+  ret double %mul
+}
+
+define double @pow_ab_a_reassoc_commute(double %a, double %b)  {
+; CHECK-LABEL: @pow_ab_a_reassoc_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[A]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = fdiv double 1.0, %a
+  %2 = call double @llvm.pow.f64(double %a, double %b)
+  %mul = fmul reassoc double %1, %2
+  ret double %mul
+}
+
+define double @pow_ab_pow_cb(double %a, double %b, double %c) {
+; CHECK-LABEL: @pow_ab_pow_cb(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.pow.f64(double [[C:%.*]], double [[B]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call double @llvm.pow.f64(double %a, double %b)
+  %2 = call double @llvm.pow.f64(double %c, double %b)
+  %mul = fmul double %2, %1
+  ret double %mul
+}
+
+define double @pow_ab_pow_cb_reassoc(double %a, double %b, double %c) {
+; CHECK-LABEL: @pow_ab_pow_cb_reassoc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.pow.f64(double [[C:%.*]], double [[B]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call double @llvm.pow.f64(double %a, double %b)
+  %2 = call double @llvm.pow.f64(double %c, double %b)
+  %mul = fmul reassoc double %2, %1
+  ret double %mul
+}
+
+define double @pow_ab_pow_ac(double %a, double %b, double %c) {
+; CHECK-LABEL: @pow_ab_pow_ac(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.pow.f64(double [[A]], double [[C:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call double @llvm.pow.f64(double %a, double %b)
+  %2 = call double @llvm.pow.f64(double %a, double %c)
+  %mul = fmul double %2, %1
+  ret double %mul
+}
+
+define double @pow_ab_x_pow_ac_reassoc(double %a, double %b, double %c) {
+; CHECK-LABEL: @pow_ab_x_pow_ac_reassoc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.pow.f64(double [[A]], double [[C:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call double @llvm.pow.f64(double %a, double %b)
+  %2 = call double @llvm.pow.f64(double %a, double %c)
+  %mul = fmul reassoc double %2, %1
+  ret double %mul
+}
diff --git a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll
new file mode 100644
index 00000000000..6ab70e4d3cd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll
@@ -0,0 +1,191 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare double @llvm.sqrt.f64(double) nounwind readnone speculatable
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
+declare void @use(double)
+
+; sqrt(a) * sqrt(b) no math flags
+
+define double @sqrt_a_sqrt_b(double %a, double %b) {
+; CHECK-LABEL: @sqrt_a_sqrt_b(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call double @llvm.sqrt.f64(double %a)
+  %2 = call double @llvm.sqrt.f64(double %b)
+  %mul = fmul double %1, %2
+  ret double %mul
+}
+
+; sqrt(a) * sqrt(b) fast-math, multiple uses
+
+define double @sqrt_a_sqrt_b_multiple_uses(double %a, double %b) {
+; CHECK-LABEL: @sqrt_a_sqrt_b_multiple_uses(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.sqrt.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call fast double @llvm.sqrt.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    call void @use(double [[TMP2]])
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call fast double @llvm.sqrt.f64(double %a)
+  %2 = call fast double @llvm.sqrt.f64(double %b)
+  %mul = fmul fast double %1, %2
+  call void @use(double %2)
+  ret double %mul
+}
+
+; sqrt(a) * sqrt(b) => sqrt(a*b) with fast-math
+
+define double @sqrt_a_sqrt_b_reassoc_nnan(double %a, double %b) {
+; CHECK-LABEL: @sqrt_a_sqrt_b_reassoc_nnan(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nnan double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc nnan double @llvm.sqrt.f64(double [[TMP1]])
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = call double @llvm.sqrt.f64(double %a)
+  %2 = call double @llvm.sqrt.f64(double %b)
+  %mul = fmul reassoc nnan double %1, %2
+  ret double %mul
+}
+
+; nnan disallows the possibility that both operands are negative,
+; so we won't return a number when the answer should be NaN.
+
+define double @sqrt_a_sqrt_b_reassoc(double %a, double %b) {
+; CHECK-LABEL: @sqrt_a_sqrt_b_reassoc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %1 = call double @llvm.sqrt.f64(double %a)
+  %2 = call double @llvm.sqrt.f64(double %b)
+  %mul = fmul reassoc double %1, %2
+  ret double %mul
+}
+
+; sqrt(a) * sqrt(b) * sqrt(c) * sqrt(d) => sqrt(a*b*c*d) with fast-math
+; 'reassoc nnan' on the fmuls is all that is required, but check propagation of other FMF.
+
+define double @sqrt_a_sqrt_b_sqrt_c_sqrt_d_reassoc(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: @sqrt_a_sqrt_b_sqrt_c_sqrt_d_reassoc(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nnan arcp double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nnan double [[TMP1]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul reassoc nnan ninf double [[TMP2]], [[D:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call reassoc nnan ninf double @llvm.sqrt.f64(double [[TMP3]])
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = call double @llvm.sqrt.f64(double %a)
+  %2 = call double @llvm.sqrt.f64(double %b)
+  %3 = call double @llvm.sqrt.f64(double %c)
+  %4 = call double @llvm.sqrt.f64(double %d)
+  %mul = fmul reassoc nnan arcp double %1, %2
+  %mul1 = fmul reassoc nnan double %mul, %3
+  %mul2 = fmul reassoc nnan ninf double %mul1, %4
+  ret double %mul2
+}
+
+define double @rsqrt_squared(double %x) {
+; CHECK-LABEL: @rsqrt_squared(
+; CHECK-NEXT:    [[SQUARED:%.*]] = fdiv fast double 1.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret double [[SQUARED]]
+;
+  %sqrt = call fast double @llvm.sqrt.f64(double %x)
+  %rsqrt = fdiv fast double 1.0, %sqrt
+  %squared = fmul fast double %rsqrt, %rsqrt
+  ret double %squared
+}
+
+define double @sqrt_divisor_squared(double %x, double %y) {
+; CHECK-LABEL: @sqrt_divisor_squared(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nnan nsz double [[Y:%.*]], [[Y]]
+; CHECK-NEXT:    [[SQUARED:%.*]] = fdiv reassoc nnan nsz double [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret double [[SQUARED]]
+;
+  %sqrt = call double @llvm.sqrt.f64(double %x)
+  %div = fdiv double %y, %sqrt
+  %squared = fmul reassoc nnan nsz double %div, %div
+  ret double %squared
+}
+
+define <2 x float> @sqrt_dividend_squared(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @sqrt_dividend_squared(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x float> [[Y:%.*]], [[Y]]
+; CHECK-NEXT:    [[SQUARED:%.*]] = fdiv fast <2 x float> [[X:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[SQUARED]]
+;
+  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
+  %div = fdiv fast <2 x float> %sqrt, %y
+  %squared = fmul fast <2 x float> %div, %div
+  ret <2 x float> %squared
+}
+
+; We do not transform this because it would result in an extra instruction.
+; This might still be a good optimization for the backend.
+
+define double @sqrt_divisor_squared_extra_use(double %x, double %y) {
+; CHECK-LABEL: @sqrt_divisor_squared_extra_use(
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[Y:%.*]], [[SQRT]]
+; CHECK-NEXT:    call void @use(double [[DIV]])
+; CHECK-NEXT:    [[SQUARED:%.*]] = fmul reassoc nnan nsz double [[DIV]], [[DIV]]
+; CHECK-NEXT:    ret double [[SQUARED]]
+;
+  %sqrt = call double @llvm.sqrt.f64(double %x)
+  %div = fdiv double %y, %sqrt
+  call void @use(double %div)
+  %squared = fmul reassoc nnan nsz double %div, %div
+  ret double %squared
+}
+
+define double @sqrt_dividend_squared_extra_use(double %x, double %y) {
+; CHECK-LABEL: @sqrt_dividend_squared_extra_use(
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    call void @use(double [[SQRT]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[Y:%.*]], [[Y]]
+; CHECK-NEXT:    [[SQUARED:%.*]] = fdiv fast double [[X]], [[TMP1]]
+; CHECK-NEXT:    ret double [[SQUARED]]
+;
+  %sqrt = call double @llvm.sqrt.f64(double %x)
+  call void @use(double %sqrt)
+  %div = fdiv fast double %sqrt, %y
+  %squared = fmul fast double %div, %div
+  ret double %squared
+}
+
+; Negative test - require 'nsz'.
+
+define double @sqrt_divisor_not_enough_FMF(double %x, double %y) {
+; CHECK-LABEL: @sqrt_divisor_not_enough_FMF(
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv double [[Y:%.*]], [[SQRT]]
+; CHECK-NEXT:    [[SQUARED:%.*]] = fmul reassoc nnan double [[DIV]], [[DIV]]
+; CHECK-NEXT:    ret double [[SQUARED]]
+;
+  %sqrt = call double @llvm.sqrt.f64(double %x)
+  %div = fdiv double %y, %sqrt
+  %squared = fmul reassoc nnan double %div, %div
+  ret double %squared
+}
+
+; TODO: This is a special-case of the general pattern. If we have a constant
+; operand, the extra use limitation could be eased because this does not
+; result in an extra instruction (1.0 * 1.0 is constant folded).
+
+define double @rsqrt_squared_extra_use(double %x) {
+; CHECK-LABEL: @rsqrt_squared_extra_use(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[RSQRT:%.*]] = fdiv fast double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    call void @use(double [[RSQRT]])
+; CHECK-NEXT:    [[SQUARED:%.*]] = fmul fast double [[RSQRT]], [[RSQRT]]
+; CHECK-NEXT:    ret double [[SQUARED]]
+;
+  %sqrt = call fast double @llvm.sqrt.f64(double %x)
+  %rsqrt = fdiv fast double 1.0, %sqrt
+  call void @use(double %rsqrt)
+  %squared = fmul fast double %rsqrt, %rsqrt
+  ret double %squared
+}
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
new file mode 100644
index 00000000000..16d138539f9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -0,0 +1,778 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; (-0.0 - X) * C => X * -C
+define float @neg_constant(float %x) {
+; CHECK-LABEL: @neg_constant(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul ninf float [[X:%.*]], -2.000000e+01
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub = fsub float -0.0, %x
+  %mul = fmul ninf float %sub, 2.0e+1
+  ret float %mul
+}
+
+define <2 x float> @neg_constant_vec(<2 x float> %x) {
+; CHECK-LABEL: @neg_constant_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], <float -2.000000e+00, float -3.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %mul = fmul ninf <2 x float> %sub, <float 2.0, float 3.0>
+  ret <2 x float> %mul
+}
+
+define <2 x float> @neg_constant_vec_undef(<2 x float> %x) {
+; CHECK-LABEL: @neg_constant_vec_undef(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], <float -2.000000e+00, float -3.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub = fsub <2 x float> <float undef, float -0.0>, %x
+  %mul = fmul ninf <2 x float> %sub, <float 2.0, float 3.0>
+  ret <2 x float> %mul
+}
+
+; (0.0 - X) * C => X * -C
+define float @neg_nsz_constant(float %x) {
+; CHECK-LABEL: @neg_nsz_constant(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan float [[X:%.*]], -2.000000e+01
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub = fsub nsz float 0.0, %x
+  %mul = fmul nnan float %sub, 2.0e+1
+  ret float %mul
+}
+
+; (-0.0 - X) * (-0.0 - Y) => X * Y
+define float @neg_neg(float %x, float %y) {
+; CHECK-LABEL: @neg_neg(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub1 = fsub float -0.0, %x
+  %sub2 = fsub float -0.0, %y
+  %mul = fmul arcp float %sub1, %sub2
+  ret float %mul
+}
+
+define <2 x float> @neg_neg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_neg_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub1 = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %sub2 = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %mul = fmul arcp <2 x float> %sub1, %sub2
+  ret <2 x float> %mul
+}
+
+define <2 x float> @neg_neg_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_neg_vec_undef(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub1 = fsub <2 x float> <float -0.0, float undef>, %x
+  %sub2 = fsub <2 x float> <float undef, float -0.0>, %y
+  %mul = fmul arcp <2 x float> %sub1, %sub2
+  ret <2 x float> %mul
+}
+
+; (0.0 - X) * (0.0 - Y) => X * Y
+define float @neg_neg_nsz(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_nsz(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub1 = fsub nsz float 0.0, %x
+  %sub2 = fsub nsz float 0.0, %y
+  %mul = fmul afn float %sub1, %sub2
+  ret float %mul
+}
+
+declare void @use_f32(float)
+
+define float @neg_neg_multi_use(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_multi_use(
+; CHECK-NEXT:    [[NX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[NY:%.*]] = fsub float -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[X]], [[Y]]
+; CHECK-NEXT:    call void @use_f32(float [[NX]])
+; CHECK-NEXT:    call void @use_f32(float [[NY]])
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %nx = fsub float -0.0, %x
+  %ny = fsub float -0.0, %y
+  %mul = fmul afn float %nx, %ny
+  call void @use_f32(float %nx)
+  call void @use_f32(float %ny)
+  ret float %mul
+}
+
+; (-0.0 - X) * Y => -0.0 - (X * Y)
+define float @neg_sink(float %x, float %y) {
+; CHECK-LABEL: @neg_sink(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub = fsub float -0.0, %x
+  %mul = fmul float %sub, %y
+  ret float %mul
+}
+
+define <2 x float> @neg_sink_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_sink_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %mul = fmul <2 x float> %sub, %y
+  ret <2 x float> %mul
+}
+
+define <2 x float> @neg_sink_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_sink_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub = fsub <2 x float> <float undef, float -0.0>, %x
+  %mul = fmul <2 x float> %sub, %y
+  ret <2 x float> %mul
+}
+
+; (0.0 - X) * Y => 0.0 - (X * Y)
+define float @neg_sink_nsz(float %x, float %y) {
+; CHECK-LABEL: @neg_sink_nsz(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub1 = fsub nsz float 0.0, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+}
+
+; "(-0.0 - X) * Y => -0.0 - (X * Y)" is disabled if expression "-0.0 - X"
+; has multiple uses.
+define float @neg_sink_multi_use(float %x, float %y) {
+; CHECK-LABEL: @neg_sink_multi_use(
+; CHECK-NEXT:    [[SUB1:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUB1]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul float [[MUL]], [[SUB1]]
+; CHECK-NEXT:    ret float [[MUL2]]
+;
+  %sub1 = fsub float -0.0, %x
+  %mul = fmul float %sub1, %y
+  %mul2 = fmul float %mul, %sub1
+  ret float %mul2
+}
+
+; Don't crash when attempting to cast a constant FMul to an instruction.
+define void @test8(i32* %inout) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP0]] = insertelement <4 x float> [[LOCAL_VAR_7_0]], float 0.000000e+00, i32 2
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %inout, align 4
+  %conv = uitofp i32 %0 to float
+  %vecinit = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, float %conv, i32 3
+  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vecinit
+  %1 = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %mul = fmul <4 x float> zeroinitializer, %1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %local_var_7.0 = phi <4 x float> [ %mul, %entry ], [ %2, %for.body ]
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = insertelement <4 x float> %local_var_7.0, float 0.000000e+00, i32 2
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; X * -1.0 => -0.0 - X
+define float @test9(float %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[MUL:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %mul = fmul float %x, -1.0
+  ret float %mul
+}
+
+; PR18532
+define <4 x float> @test10(<4 x float> %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[MUL:%.*]] = fsub arcp afn <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    ret <4 x float> [[MUL]]
+;
+  %mul = fmul arcp afn <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+  ret <4 x float> %mul
+}
+
+define float @test11(float %x, float %y) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[B:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = fadd fast float [[B]], 3.000000e+00
+; CHECK-NEXT:    ret float [[C]]
+;
+  %a = fadd fast float %x, 1.0
+  %b = fadd fast float %y, 2.0
+  %c = fadd fast float %a, %b
+  ret float %c
+}
+
+declare double @llvm.sqrt.f64(double)
+
+; With unsafe/fast math, sqrt(X) * sqrt(X) is just X,
+; but make sure another use of the sqrt is intact.
+; Note that the remaining fmul is altered but is not 'fast'
+; itself because it was not marked 'fast' originally.
+; Thus, we have an overall fast result, but no more indication of
+; 'fast'ness in the code.
+define double @sqrt_squared2(double %f) {
+; CHECK-LABEL: @sqrt_squared2(
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]])
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul double [[SQRT]], [[F]]
+; CHECK-NEXT:    ret double [[MUL2]]
+;
+  %sqrt = call double @llvm.sqrt.f64(double %f)
+  %mul1 = fmul fast double %sqrt, %sqrt
+  %mul2 = fmul double %mul1, %sqrt
+  ret double %mul2
+}
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+define float @fabs_squared(float %x) {
+; CHECK-LABEL: @fabs_squared(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], [[X]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %mul = fmul float %x.fabs, %x.fabs
+  ret float %mul
+}
+
+define float @fabs_squared_fast(float %x) {
+; CHECK-LABEL: @fabs_squared_fast(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %mul = fmul fast float %x.fabs, %x.fabs
+  ret float %mul
+}
+
+define float @fabs_x_fabs(float %x, float %y) {
+; CHECK-LABEL: @fabs_x_fabs(
+; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X_FABS]], [[Y_FABS]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %y.fabs = call float @llvm.fabs.f32(float %y)
+  %mul = fmul float %x.fabs, %y.fabs
+  ret float %mul
+}
+
+; (X*Y) * X => (X*X) * Y
+; The transform only requires 'reassoc', but test other FMF in
+; the commuted variants to make sure FMF propagates as expected.
+
+define float @reassoc_common_operand1(float %x, float %y) {
+; CHECK-LABEL: @reassoc_common_operand1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul reassoc float [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL2]]
+;
+  %mul1 = fmul float %x, %y
+  %mul2 = fmul reassoc float %mul1, %x
+  ret float %mul2
+}
+
+; (Y*X) * X => (X*X) * Y
+
+define float @reassoc_common_operand2(float %x, float %y) {
+; CHECK-LABEL: @reassoc_common_operand2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul fast float [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL2]]
+;
+  %mul1 = fmul float %y, %x
+  %mul2 = fmul fast float %mul1, %x
+  ret float %mul2
+}
+
+; X * (X*Y) => (X*X) * Y
+
+define float @reassoc_common_operand3(float %x1, float %y) {
+; CHECK-LABEL: @reassoc_common_operand3(
+; CHECK-NEXT:    [[X:%.*]] = fdiv float [[X1:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nnan float [[X]], [[X]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul reassoc nnan float [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL2]]
+;
+  %x = fdiv float %x1, 3.0 ; thwart complexity-based canonicalization
+  %mul1 = fmul float %x, %y
+  %mul2 = fmul reassoc nnan float %x, %mul1
+  ret float %mul2
+}
+
+; X * (Y*X) => (X*X) * Y
+
+define float @reassoc_common_operand4(float %x1, float %y) {
+; CHECK-LABEL: @reassoc_common_operand4(
+; CHECK-NEXT:    [[X:%.*]] = fdiv float [[X1:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc ninf float [[X]], [[X]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul reassoc ninf float [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL2]]
+;
+  %x = fdiv float %x1, 3.0 ; thwart complexity-based canonicalization
+  %mul1 = fmul float %y, %x
+  %mul2 = fmul reassoc ninf float %x, %mul1
+  ret float %mul2
+}
+
+; No change if the first fmul has another use.
+
+define float @reassoc_common_operand_multi_use(float %x, float %y) {
+; CHECK-LABEL: @reassoc_common_operand_multi_use(
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul fast float [[MUL1]], [[X]]
+; CHECK-NEXT:    call void @use_f32(float [[MUL1]])
+; CHECK-NEXT:    ret float [[MUL2]]
+;
+  %mul1 = fmul float %x, %y
+  %mul2 = fmul fast float %mul1, %x
+  call void @use_f32(float %mul1)
+  ret float %mul2
+}
+
+declare float @llvm.log2.f32(float)
+
+; log2(Y * 0.5) * X = log2(Y) * X - X
+
+define float @log2half(float %x, float %y) {
+; CHECK-LABEL: @log2half(
+; CHECK-NEXT:    [[LOG2:%.*]] = call fast float @llvm.log2.f32(float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[LOG2]], [[X:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fsub fast float [[TMP1]], [[X]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %halfy = fmul float %y, 0.5
+  %log2 = call float @llvm.log2.f32(float %halfy)
+  %mul = fmul fast float %log2, %x
+  ret float %mul
+}
+
+define float @log2half_commute(float %x1, float %y) {
+; CHECK-LABEL: @log2half_commute(
+; CHECK-NEXT:    [[LOG2:%.*]] = call fast float @llvm.log2.f32(float [[Y:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[LOG2]], [[X1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[TMP1]], [[X1]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[TMP2]], 0x3FC24924A0000000
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %x = fdiv float %x1, 7.0 ; thwart complexity-based canonicalization
+  %halfy = fmul float %y, 0.5
+  %log2 = call float @llvm.log2.f32(float %halfy)
+  %mul = fmul fast float %x, %log2
+  ret float %mul
+}
+
+; C1/X * C2 => (C1*C2) / X
+
+define float @fdiv_constant_numerator_fmul(float %x) {
+; CHECK-LABEL: @fdiv_constant_numerator_fmul(
+; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv float 2.0e+3, %x
+  %t3 = fmul reassoc float %t1, 6.0e+3
+  ret float %t3
+}
+
+; C1/X * C2 => (C1*C2) / X is disabled if C1/X has multiple uses
+
+@fmul2_external = external global float
+
+define float @fdiv_constant_numerator_fmul_extra_use(float %x) {
+; CHECK-LABEL: @fdiv_constant_numerator_fmul_extra_use(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv fast float 1.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    store float [[DIV]], float* @fmul2_external, align 4
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[DIV]], 2.000000e+00
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %div = fdiv fast float 1.0, %x
+  store float %div, float* @fmul2_external
+  %mul = fmul fast float %div, 2.0
+  ret float %mul
+}
+
+; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal FP)
+
+define float @fdiv_constant_denominator_fmul(float %x) {
+; CHECK-LABEL: @fdiv_constant_denominator_fmul(
+; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv float %x, 2.0e+3
+  %t3 = fmul reassoc float %t1, 6.0e+3
+  ret float %t3
+}
+
+define <4 x float> @fdiv_constant_denominator_fmul_vec(<4 x float> %x) {
+; CHECK-LABEL: @fdiv_constant_denominator_fmul_vec(
+; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[T3]]
+;
+  %t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
+  %t3 = fmul reassoc <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
+  ret <4 x float> %t3
+}
+
+; Make sure fmul with constant expression doesn't assert.
+
+define <4 x float> @fdiv_constant_denominator_fmul_vec_constexpr(<4 x float> %x) {
+; CHECK-LABEL: @fdiv_constant_denominator_fmul_vec_constexpr(
+; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[T3]]
+;
+  %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float>
+  %t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
+  %t3 = fmul reassoc <4 x float> %t1, %constExprMul
+  ret <4 x float> %t3
+}
+
+; This shows that at least part of instcombine does not check constant
+; values to see if it is creating denorms (0x3800000000000000 is a denorm
+; for 32-bit float), so protecting against denorms in other parts is
+; probably not doing the intended job.
+
+define float @fmul_constant_reassociation(float %x) {
+; CHECK-LABEL: @fmul_constant_reassociation(
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nsz float [[X:%.*]], 0x3800000000000000
+; CHECK-NEXT:    ret float [[R]]
+;
+  %mul_flt_min = fmul reassoc nsz float %x, 0x3810000000000000
+  %r = fmul reassoc nsz float  %mul_flt_min, 0.5
+  ret float %r
+}
+
+; Canonicalization "X/C1 * C2 => X * (C2/C1)" still applies if C2/C1 is denormal
+; (otherwise, we should not have allowed the reassociation in the previous test).
+; 0x3810000000000000 == FLT_MIN
+
+define float @fdiv_constant_denominator_fmul_denorm(float %x) {
+; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm(
+; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[X:%.*]], 0x3760620000000000
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv float %x, 2.0e+3
+  %t3 = fmul fast float %t1, 0x3810000000000000
+  ret float %t3
+}
+
+; X / C1 * C2 => X / (C2/C1) if C1/C2 is abnormal, but C2/C1 is a normal value.
+; TODO: We don't convert the fast fdiv to fmul because that would be multiplication
+; by a denormal, but we could do better when we know that denormals are not a problem.
+
+define float @fdiv_constant_denominator_fmul_denorm_try_harder(float %x) {
+; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm_try_harder(
+; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float [[X:%.*]], 0x47E8000000000000
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv float %x, 3.0
+  %t3 = fmul reassoc float %t1, 0x3810000000000000
+  ret float %t3
+}
+
+; Negative test: we should not have 2 divisions instead of the 1 we started with.
+
+define float @fdiv_constant_denominator_fmul_denorm_try_harder_extra_use(float %x) {
+; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm_try_harder_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[T1]], 0x3810000000000000
+; CHECK-NEXT:    [[R:%.*]] = fadd float [[T1]], [[T3]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %t1 = fdiv float %x, 3.0e+0
+  %t3 = fmul fast float %t1, 0x3810000000000000
+  %r = fadd float %t1, %t3
+  ret float %r
+}
+
+; (X + C1) * C2 --> (X * C2) + C1*C2
+
+define float @fmul_fadd_distribute(float %x) {
+; CHECK-LABEL: @fmul_fadd_distribute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[TMP1]], 6.000000e+00
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t2 = fadd float %x, 2.0
+  %t3 = fmul reassoc float %t2, 3.0
+  ret float %t3
+}
+
+; (X - C1) * C2 --> (X * C2) - C1*C2
+
+define float @fmul_fsub_distribute1(float %x) {
+; CHECK-LABEL: @fmul_fsub_distribute1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[TMP1]], -6.000000e+00
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t2 = fsub float %x, 2.0
+  %t3 = fmul reassoc float %t2, 3.0
+  ret float %t3
+}
+
+; (C1 - X) * C2 --> C1*C2 - (X * C2)
+
+define float @fmul_fsub_distribute2(float %x) {
+; CHECK-LABEL: @fmul_fsub_distribute2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float 6.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t2 = fsub float 2.0, %x
+  %t3 = fmul reassoc float %t2, 3.0
+  ret float %t3
+}
+
+; FIXME: This should only need 'reassoc'.
+; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3)
+
+define float @fmul_fadd_fmul_distribute(float %x) {
+; CHECK-LABEL: @fmul_fadd_fmul_distribute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], 1.000000e+01
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fadd float %t1, 2.0
+  %t3 = fmul fast float %t2, 5.0
+  ret float %t3
+}
+
+define float @fmul_fadd_distribute_extra_use(float %x) {
+; CHECK-LABEL: @fmul_fadd_distribute_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fadd float [[T1]], 2.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[T2]], 5.000000e+00
+; CHECK-NEXT:    call void @use_f32(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fadd float %t1, 2.0
+  %t3 = fmul fast float %t2, 5.0
+  call void @use_f32(float %t2)
+  ret float %t3
+}
+
+; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3
+; 0x10000000000000 = DBL_MIN
+; TODO: We don't convert the fast fdiv to fmul because that would be multiplication
+; by a denormal, but we could do better when we know that denormals are not a problem.
+
+define double @fmul_fadd_fdiv_distribute2(double %x) {
+; CHECK-LABEL: @fmul_fadd_fdiv_distribute2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
+; CHECK-NEXT:    ret double [[T3]]
+;
+  %t1 = fdiv double %x, 3.0
+  %t2 = fadd double %t1, 5.0
+  %t3 = fmul reassoc double %t2, 0x10000000000000
+  ret double %t3
+}
+
+; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot
+; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN)
+
+define double @fmul_fadd_fdiv_distribute3(double %x) {
+; CHECK-LABEL: @fmul_fadd_fdiv_distribute3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
+; CHECK-NEXT:    ret double [[T3]]
+;
+  %t1 = fdiv double %x, 3.0
+  %t2 = fadd double %t1, 5.0
+  %t3 = fmul reassoc double %t2, 0x10000000000000
+  ret double %t3
+}
+
+; FIXME: This should only need 'reassoc'.
+; (C2 - (X*C1)) * C3 => (C2*C3) - (X * (C1*C3))
+
+define float @fmul_fsub_fmul_distribute(float %x) {
+; CHECK-LABEL: @fmul_fsub_fmul_distribute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fsub fast float 1.000000e+01, [[TMP1]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fsub float 2.0, %t1
+  %t3 = fmul fast float %t2, 5.0
+  ret float %t3
+}
+
+define float @fmul_fsub_fmul_distribute_extra_use(float %x) {
+; CHECK-LABEL: @fmul_fsub_fmul_distribute_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fsub float 2.000000e+00, [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[T2]], 5.000000e+00
+; CHECK-NEXT:    call void @use_f32(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fsub float 2.0, %t1
+  %t3 = fmul fast float %t2, 5.0
+  call void @use_f32(float %t2)
+  ret float %t3
+}
+
+; FIXME: This should only need 'reassoc'.
+; ((X*C1) - C2) * C3 => (X * (C1*C3)) - C2*C3
+
+define float @fmul_fsub_fmul_distribute2(float %x) {
+; CHECK-LABEL: @fmul_fsub_fmul_distribute2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], -1.000000e+01
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fsub float %t1, 2.0
+  %t3 = fmul fast float %t2, 5.0
+  ret float %t3
+}
+
+define float @fmul_fsub_fmul_distribute2_extra_use(float %x) {
+; CHECK-LABEL: @fmul_fsub_fmul_distribute2_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00
+; CHECK-NEXT:    [[T2:%.*]] = fsub float 2.000000e+00, [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[T2]], 5.000000e+00
+; CHECK-NEXT:    call void @use_f32(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fmul float %x, 6.0
+  %t2 = fsub float 2.0, %t1
+  %t3 = fmul fast float %t2, 5.0
+  call void @use_f32(float %t2)
+  ret float %t3
+}
+
+; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
+
+define float @common_factor(float %x, float %y) {
+; CHECK-LABEL: @common_factor(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[MUL]], [[X]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL1]], [[MUL]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %mul = fmul float %x, %y
+  %mul1 = fmul fast float %mul, %x
+  %add = fadd float %mul1, %mul
+  ret float %add
+}
+
+define double @fmul_fdiv_factor_squared(double %x, double %y) {
+; CHECK-LABEL: @fmul_fdiv_factor_squared(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv fast double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SQUARED:%.*]] = fmul fast double [[DIV]], [[DIV]]
+; CHECK-NEXT:    ret double [[SQUARED]]
+;
+  %div = fdiv fast double %x, %y
+  %squared = fmul fast double %div, %div
+  ret double %squared
+}
+
+define double @fmul_fdivs_factor_common_denominator(double %x, double %y, double %z) {
+; CHECK-LABEL: @fmul_fdivs_factor_common_denominator(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[Z:%.*]], [[Z]]
+; CHECK-NEXT:    [[MUL:%.*]] = fdiv fast double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %div1 = fdiv double %x, %z
+  %div2 = fdiv double %y, %z
+  %mul = fmul fast double %div1, %div2
+  ret double %mul
+}
+
+define double @fmul_fdivs_factor(double %x, double %y, double %z, double %w) {
+; CHECK-LABEL: @fmul_fdivs_factor(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc double [[Z:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc double [[TMP1]], [[W:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc double [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %div1 = fdiv double %x, %y
+  %div2 = fdiv double %z, %w
+  %mul = fmul reassoc double %div1, %div2
+  ret double %mul
+}
+
+define double @fmul_fdiv_factor(double %x, double %y, double %z) {
+; CHECK-LABEL: @fmul_fdiv_factor(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc double [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %div = fdiv double %x, %y
+  %mul = fmul reassoc double %div, %z
+  ret double %mul
+}
+
+define double @fmul_fdiv_factor_constant1(double %x, double %y) {
+; CHECK-LABEL: @fmul_fdiv_factor_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %div = fdiv double %x, %y
+  %mul = fmul reassoc double %div, 42.0
+  ret double %mul
+}
+
+define <2 x float> @fmul_fdiv_factor_constant2(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fmul_fdiv_factor_constant2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc <2 x float> [[TMP1]], <float 4.200000e+01, float 1.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %div = fdiv <2 x float> %x, <float 42.0, float 12.0>
+  %mul = fmul reassoc <2 x float> %div, %y
+  ret <2 x float> %mul
+}
+
+define float @fmul_fdiv_factor_extra_use(float %x, float %y) {
+; CHECK-LABEL: @fmul_fdiv_factor_extra_use(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    call void @use_f32(float [[DIV]])
+; CHECK-NEXT:    [[MUL:%.*]] = fmul reassoc float [[DIV]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %div = fdiv float %x, 42.0
+  call void @use_f32(float %div)
+  %mul = fmul reassoc float %div, %y
+  ret float %mul
+}
diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll
new file mode 100644
index 00000000000..df1d5f570db
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fneg.ll
@@ -0,0 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @use(float)
+
+; -(X * C) --> X * (-C)
+
+define float @fmul_fneg(float %x) {
+; CHECK-LABEL: @fmul_fneg(
+; CHECK-NEXT:    [[R:%.*]] = fmul float [[X:%.*]], -4.200000e+01
+; CHECK-NEXT:    ret float [[R]]
+;
+  %m = fmul float %x, 42.0
+  %r = fsub float -0.0, %m
+  ret float %r
+}
+
+; Fast math is not required, but it should be propagated.
+
+define float @fmul_fneg_fmf(float %x) {
+; CHECK-LABEL: @fmul_fneg_fmf(
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc nsz float [[X:%.*]], -4.200000e+01
+; CHECK-NEXT:    ret float [[R]]
+;
+  %m = fmul float %x, 42.0
+  %r = fsub reassoc nsz float -0.0, %m
+  ret float %r
+}
+
+; Extra use prevents the fold. We don't want to replace the fneg with an fmul.
+
+define float @fmul_fneg_extra_use(float %x) {
+; CHECK-LABEL: @fmul_fneg_extra_use(
+; CHECK-NEXT:    [[M:%.*]] = fmul float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[M]]
+; CHECK-NEXT:    call void @use(float [[M]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %m = fmul float %x, 42.0
+  %r = fsub float -0.0, %m
+  call void @use(float %m)
+  ret float %r
+}
+
+; Try a vector. Use special constants (NaN, INF, undef) because they don't change anything.
+
+define <4 x double> @fmul_fneg_vec(<4 x double> %x) {
+; CHECK-LABEL: @fmul_fneg_vec(
+; CHECK-NEXT:    [[R:%.*]] = fmul <4 x double> [[X:%.*]], <double -4.200000e+01, double 0x7F80000000000000, double 0xFFF0000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <4 x double> [[R]]
+;
+  %m = fmul <4 x double> %x, <double 42.0, double 0x7FF80000000000000, double 0x7FF0000000000000, double undef>
+  %r = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
+  ret <4 x double> %r
+}
+
+; -(X / C) --> X / (-C)
+
+define float @fdiv_op1_constant_fneg(float %x) {
+; CHECK-LABEL: @fdiv_op1_constant_fneg(
+; CHECK-NEXT:    [[R:%.*]] = fdiv float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    ret float [[R]]
+;
+  %d = fdiv float %x, -42.0
+  %r = fsub float -0.0, %d
+  ret float %r
+}
+
+; Fast math is not required, but it should be propagated.
+
+define float @fdiv_op1_constant_fneg_fmf(float %x) {
+; CHECK-LABEL: @fdiv_op1_constant_fneg_fmf(
+; CHECK-NEXT:    [[R:%.*]] = fdiv nnan float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    ret float [[R]]
+;
+  %d = fdiv float %x, -42.0
+  %r = fsub nnan float -0.0, %d
+  ret float %r
+}
+
+; Extra use prevents the fold. We don't want to replace the fneg with an fdiv.
+
+define float @fdiv_op1_constant_fneg_extra_use(float %x) {
+; CHECK-LABEL: @fdiv_op1_constant_fneg_extra_use(
+; CHECK-NEXT:    [[D:%.*]] = fdiv float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[D]]
+; CHECK-NEXT:    call void @use(float [[D]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %d = fdiv float %x, 42.0
+  %r = fsub float -0.0, %d
+  call void @use(float %d)
+  ret float %r
+}
+
+; Try a vector. Use special constants (NaN, INF, undef) because they don't change anything.
+
+define <4 x double> @fdiv_op1_constant_fneg_vec(<4 x double> %x) {
+; CHECK-LABEL: @fdiv_op1_constant_fneg_vec(
+; CHECK-NEXT:    [[R:%.*]] = fdiv <4 x double> [[X:%.*]], <double 4.200000e+01, double 0x7FF800000ABCD000, double 0x7FF0000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <4 x double> [[R]]
+;
+  %d = fdiv <4 x double> %x, <double -42.0, double 0xFFF800000ABCD000, double 0xFFF0000000000000, double undef>
+  %r = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %d
+  ret <4 x double> %r
+}
+
+; -(C / X) --> (-C) / X
+
+define float @fdiv_op0_constant_fneg(float %x) {
+; CHECK-LABEL: @fdiv_op0_constant_fneg(
+; CHECK-NEXT:    [[R:%.*]] = fdiv float -4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %d = fdiv float 42.0, %x
+  %r = fsub float -0.0, %d
+  ret float %r
+}
+
+; Fast math is not required, but it should be propagated.
+
+define float @fdiv_op0_constant_fneg_fmf(float %x) {
+; CHECK-LABEL: @fdiv_op0_constant_fneg_fmf(
+; CHECK-NEXT:    [[R:%.*]] = fdiv fast float -4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %d = fdiv float 42.0, %x
+  %r = fsub fast float -0.0, %d
+  ret float %r
+}
+
+; Extra use prevents the fold. We don't want to replace the fneg with an fdiv.
+
+define float @fdiv_op0_constant_fneg_extra_use(float %x) {
+; CHECK-LABEL: @fdiv_op0_constant_fneg_extra_use(
+; CHECK-NEXT:    [[D:%.*]] = fdiv float -4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[D]]
+; CHECK-NEXT:    call void @use(float [[D]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %d = fdiv float -42.0, %x
+  %r = fsub float -0.0, %d
+  call void @use(float %d)
+  ret float %r
+}
+
+; Try a vector. Use special constants (NaN, INF, undef) because they don't change anything.
+
+define <4 x double> @fdiv_op0_constant_fneg_vec(<4 x double> %x) {
+; CHECK-LABEL: @fdiv_op0_constant_fneg_vec(
+; CHECK-NEXT:    [[R:%.*]] = fdiv <4 x double> <double 4.200000e+01, double 0x7F80000000000000, double 0x7FF0000000000000, double 0x7FF8000000000000>, [[X:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[R]]
+;
+  %d = fdiv <4 x double> <double -42.0, double 0x7FF80000000000000, double 0xFFF0000000000000, double undef>, %x
+  %r = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %d
+  ret <4 x double> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
new file mode 100644
index 00000000000..d3303262be3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i1 @f(i1 %x) {
+; CHECK-LABEL: @f(
+; CHECK: ret i1 false
+	%b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
+	ret i1 %b
+}
+
+define i32 @g(i32 %x) {
+; CHECK-LABEL: @g(
+; CHECK: ret i32 %x
+	%b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32)
+	ret i32 %b
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fold-calls.ll b/llvm/test/Transforms/InstCombine/fold-calls.ll
new file mode 100644
index 00000000000..1a9a9fd2e9e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-calls.ll
@@ -0,0 +1,19 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; This shouldn't fold, because sin(inf) is invalid.
+; CHECK-LABEL: @foo(
+; CHECK:   %t = call double @sin(double 0x7FF0000000000000)
+define double @foo() {
+  %t = call double @sin(double 0x7FF0000000000000)
+  ret double %t
+}
+
+; This should fold.
+; CHECK-LABEL: @bar(
+; CHECK:   ret double 0.0
+define double @bar() {
+  %t = call double @sin(double 0.0)
+  ret double %t
+}
+
+declare double @sin(double)
diff --git a/llvm/test/Transforms/InstCombine/fold-fops-into-selects.ll b/llvm/test/Transforms/InstCombine/fold-fops-into-selects.ll
new file mode 100644
index 00000000000..07aebb13eff
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-fops-into-selects.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @test1(i1 %A) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float 0.000000e+00
+  %op = fsub float 1.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test1(
+; CHECK: select i1 %A, float 0.000000e+00, float 1.000000e+00
+}
+
+define float @test2(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fadd float 2.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test2(
+; CHECK: [[OP:%.*]] = fadd float %B, 2.000000e+00
+; CHECK: select i1 %A, float 3.000000e+00, float [[OP]]
+}
+
+define float @test3(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fsub float 2.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test3(
+; CHECK: [[OP:%.*]] = fsub float 2.000000e+00, %B
+; CHECK: select i1 %A, float 1.000000e+00, float [[OP]]
+}
+
+define float @test4(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fmul float 2.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test4(
+; CHECK: [[OP:%.*]] = fmul float %B, 2.000000e+00
+; CHECK: select i1 %A, float 2.000000e+00, float [[OP]]
+}
+
+define float @test5(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fdiv float 2.000000e+00, %cf
+  ret float %op
+; CHECK-LABEL: @test5(
+; CHECK: [[OP:%.*]] = fdiv float 2.000000e+00, %B
+; CHECK: select i1 %A, float 2.000000e+00, float [[OP]]
+}
+
+define float @test6(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fdiv float %cf, 2.000000e+00
+  ret float %op
+; CHECK-LABEL: @test6(
+; CHECK: [[OP:%.*]] = fmul float %B, 5.000000e-01
+; CHECK: select i1 %A, float 5.000000e-01, float [[OP]]
+}
+
+define float @test7(i1 %A, float %B) {
+EntryBlock:
+  %cf = select i1 %A, float 1.000000e+00, float %B
+  %op = fdiv float %cf, 3.000000e+00
+  ret float %op
+; CHECK-LABEL: @test7(
+; CHECK: [[OP:%.*]] = fdiv float %B, 3.000000e+00
+; CHECK: select i1 %A, float 0x3FD5555560000000, float [[OP]]
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll b/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
new file mode 100644
index 00000000000..e5a1aa7362a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
@@ -0,0 +1,69 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+@g1 = common global i32* null, align 8
+
+%struct.S1 = type { i32, float }
+%struct.S2 = type { float, i32 }
+
+; Check that instcombine preserves metadata when it merges two loads.
+;
+; CHECK: return:
+; CHECK: load i32*, i32** %{{[a-z0-9.]+}}, align 8, !nonnull ![[EMPTYNODE:[0-9]+]]
+; CHECK: load i32, i32* %{{[a-z0-9.]+}}, align 4, !tbaa ![[TBAA:[0-9]+]], !range ![[RANGE:[0-9]+]], !invariant.load ![[EMPTYNODE:[0-9]+]], !alias.scope ![[ALIAS_SCOPE:[0-9]+]], !noalias ![[NOALIAS:[0-9]+]]
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @phi_load_metadata(%struct.S1* %s1, %struct.S2* %s2, i32 %c, i32** %x0, i32 **%x1) #0 {
+entry:
+  %tobool = icmp eq i32 %c, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %i = getelementptr inbounds %struct.S2, %struct.S2* %s2, i64 0, i32 1
+  %val = load i32, i32* %i, align 4, !tbaa !0, !alias.scope !13, !noalias !14, !invariant.load !17, !range !18
+  %p0 = load i32*, i32** %x0, align 8, !nonnull !17
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i64 0, i32 0
+  %val2 = load i32, i32* %i2, align 4, !tbaa !2, !alias.scope !15, !noalias !16, !invariant.load !17, !range !19
+  %p1 = load i32*, i32** %x1, align 8, !nonnull !17
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval = phi i32 [ %val, %if.then ], [ %val2, %if.end ]
+  %pval = phi i32* [ %p0, %if.then ], [ %p1, %if.end ]
+  store i32* %pval, i32** @g1, align 8
+  ret i32 %retval
+}
+
+; CHECK: ![[EMPTYNODE]] = !{}
+; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0}
+; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0}
+; CHECK: ![[RANGE]] = !{i32 10, i32 25}
+; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE2:[0-9]+]], ![[SCOPE1:[0-9]+]]}
+; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"}
+; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
+; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"}
+; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]}
+; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"}
+
+!0 = !{!1, !4, i64 4}
+!1 = !{!"", !7, i64 0, !4, i64 4}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"float", !5, i64 0}
+!8 = !{!8, !"some domain"}
+!9 = !{!9, !8, !"scope0"}
+!10 = !{!10, !8, !"scope1"}
+!11 = !{!11, !8, !"scope2"}
+!12 = !{!12, !8, !"scope3"}
+!13 = !{!9, !10}
+!14 = !{!11, !12}
+!15 = !{!9, !11}
+!16 = !{!10, !12}
+!17 = !{}
+!18 = !{i32 10, i32 20}
+!19 = !{i32 15, i32 25}
diff --git a/llvm/test/Transforms/InstCombine/fold-phi.ll b/llvm/test/Transforms/InstCombine/fold-phi.ll
new file mode 100644
index 00000000000..c6bb1b36335
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-phi.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: no_crash
+define float @no_crash(float %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
+  %add5 = fadd float %sum.057, %a    ; PR14592
+  br i1 undef, label %bb0, label %end
+
+bb0:
+  br label %for.body
+
+end:
+  ret float %add5
+}
+
+; CHECK-LABEL: @pr21377(
+define void @pr21377(i32) {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %if.end, %entry
+  %phi1 = phi i64 [ undef, %entry ], [ %or2, %if.end ]
+  %zext = zext i32 %0 to i64
+  br i1 undef, label %if.end, label %if.else
+
+if.else:                                          ; preds = %while.body
+  %or1 = or i64 %phi1, %zext
+  %and = and i64 %or1, 4294967295
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %while.body
+  %phi2 = phi i64 [ %and, %if.else ], [ undef, %while.body ]
+  %or2 = or i64 %phi2, %zext
+  br label %while.body
+}
diff --git a/llvm/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll b/llvm/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll
new file mode 100644
index 00000000000..bd92b4a29c0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll
@@ -0,0 +1,17 @@
+; RUN: opt -instcombine -S -disable-simplify-libcalls < %s | FileCheck %s
+; rdar://10466410
+
+; Instcombine tries to fold (fptrunc (sqrt (fpext x))) -> (sqrtf x), but this
+; shouldn't fold when sqrtf isn't available.
+define float @foo(float %f) uwtable ssp {
+entry:
+; CHECK: %conv = fpext float %f to double
+; CHECK: %call = tail call double @sqrt(double %conv)
+; CHECK: %conv1 = fptrunc double %call to float
+  %conv = fpext float %f to double
+  %call = tail call double @sqrt(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+declare double @sqrt(double)
diff --git a/llvm/test/Transforms/InstCombine/fold-vector-select.ll b/llvm/test/Transforms/InstCombine/fold-vector-select.ll
new file mode 100644
index 00000000000..b58d9dc90ac
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-vector-select.ll
@@ -0,0 +1,150 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK-NOT: select
+
+define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D,
+                 <4 x i32> *%E, <4 x i32> *%F, <4 x i32> *%G, <4 x i32> *%H,
+                 <4 x i32> *%I, <4 x i32> *%J, <4 x i32> *%K, <4 x i32> *%L,
+                 <4 x i32> *%M, <4 x i32> *%N, <4 x i32> *%O, <4 x i32> *%P,
+                 <4 x i32> *%Q, <4 x i32> *%R, <4 x i32> *%S, <4 x i32> *%T,
+                 <4 x i32> *%U, <4 x i32> *%V, <4 x i32> *%W, <4 x i32> *%X,
+                 <4 x i32> *%Y, <4 x i32> *%Z, <4 x i32> *%BA, <4 x i32> *%BB,
+                 <4 x i32> *%BC, <4 x i32> *%BD, <4 x i32> *%BE, <4 x i32> *%BF,
+                 <4 x i32> *%BG, <4 x i32> *%BH, <4 x i32> *%BI, <4 x i32> *%BJ,
+                 <4 x i32> *%BK, <4 x i32> *%BL, <4 x i32> *%BM, <4 x i32> *%BN,
+                 <4 x i32> *%BO, <4 x i32> *%BP, <4 x i32> *%BQ, <4 x i32> *%BR,
+                 <4 x i32> *%BS, <4 x i32> *%BT, <4 x i32> *%BU, <4 x i32> *%BV,
+                 <4 x i32> *%BW, <4 x i32> *%BX, <4 x i32> *%BY, <4 x i32> *%BZ,
+                 <4 x i32> *%CA, <4 x i32> *%CB, <4 x i32> *%CC, <4 x i32> *%CD,
+                 <4 x i32> *%CE, <4 x i32> *%CF, <4 x i32> *%CG, <4 x i32> *%CH,
+                 <4 x i32> *%CI, <4 x i32> *%CJ, <4 x i32> *%CK, <4 x i32> *%CL) {
+ %a = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 9, i32 87, i32 57, i32 8>
+ %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 44, i32 99, i32 49, i32 29>
+ %c = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 15, i32 18, i32 53, i32 84>
+ %d = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 29, i32 82, i32 45, i32 16>
+ %e = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 11, i32 15, i32 32, i32 99>
+ %f = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 19, i32 86, i32 29, i32 33>
+ %g = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 44, i32 10, i32 26, i32 45>
+ %h = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 88, i32 70, i32 90, i32 48>
+ %i = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 30, i32 53, i32 42, i32 12>
+ %j = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 46, i32 24, i32 93, i32 26>
+ %k = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 33, i32 99, i32 15, i32 57>
+ %l = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 51, i32 60, i32 60, i32 50>
+ %m = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 50, i32 12, i32 7, i32 45>
+ %n = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 15, i32 65, i32 36, i32 36>
+ %o = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 54, i32 0, i32 17, i32 78>
+ %p = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 56, i32 13, i32 64, i32 48>
+ %q = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> <i32 52, i32 69, i32 88, i32 11>, <4 x i32> zeroinitializer
+ %r = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> <i32 5, i32 87, i32 68, i32 14>, <4 x i32> zeroinitializer
+ %s = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> <i32 47, i32 17, i32 66, i32 63>, <4 x i32> zeroinitializer
+ %t = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> <i32 64, i32 25, i32 73, i32 81>, <4 x i32> zeroinitializer
+ %u = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> <i32 51, i32 41, i32 61, i32 63>, <4 x i32> zeroinitializer
+ %v = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> <i32 39, i32 59, i32 17, i32 0>, <4 x i32> zeroinitializer
+ %w = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> <i32 91, i32 99, i32 97, i32 29>, <4 x i32> zeroinitializer
+ %x = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> <i32 89, i32 45, i32 89, i32 10>, <4 x i32> zeroinitializer
+ %y = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 25, i32 70, i32 21, i32 27>, <4 x i32> zeroinitializer
+ %z = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> <i32 40, i32 12, i32 27, i32 88>, <4 x i32> zeroinitializer
+ %ba = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> <i32 36, i32 35, i32 90, i32 23>, <4 x i32> zeroinitializer
+ %bb = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> <i32 83, i32 3, i32 64, i32 82>, <4 x i32> zeroinitializer
+ %bc = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> <i32 15, i32 72, i32 2, i32 54>, <4 x i32> zeroinitializer
+ %bd = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> <i32 32, i32 47, i32 100, i32 84>, <4 x i32> zeroinitializer
+ %be = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> <i32 92, i32 57, i32 82, i32 1>, <4 x i32> zeroinitializer
+ %bf = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 42, i32 14, i32 22, i32 89>, <4 x i32> zeroinitializer
+ %bg = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> <i32 33, i32 10, i32 67, i32 66>, <4 x i32> <i32 42, i32 91, i32 47, i32 40>
+ %bh = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> <i32 8, i32 13, i32 48, i32 0>, <4 x i32> <i32 84, i32 66, i32 87, i32 84>
+ %bi = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> <i32 85, i32 96, i32 1, i32 94>, <4 x i32> <i32 54, i32 57, i32 7, i32 92>
+ %bj = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> <i32 55, i32 21, i32 92, i32 68>, <4 x i32> <i32 51, i32 61, i32 62, i32 39>
+ %bk = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> <i32 42, i32 18, i32 77, i32 74>, <4 x i32> <i32 82, i32 33, i32 30, i32 7>
+ %bl = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> <i32 80, i32 92, i32 61, i32 84>, <4 x i32> <i32 43, i32 89, i32 92, i32 6>
+ %bm = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> <i32 49, i32 14, i32 62, i32 62>, <4 x i32> <i32 35, i32 33, i32 92, i32 59>
+ %bn = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> <i32 3, i32 97, i32 49, i32 18>, <4 x i32> <i32 56, i32 64, i32 19, i32 75>
+ %bo = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 91, i32 57, i32 0, i32 1>, <4 x i32> <i32 43, i32 63, i32 64, i32 11>
+ %bp = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> <i32 41, i32 65, i32 18, i32 11>, <4 x i32> <i32 86, i32 26, i32 31, i32 3>
+ %bq = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> <i32 31, i32 46, i32 32, i32 68>, <4 x i32> <i32 100, i32 59, i32 62, i32 6>
+ %br = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> <i32 76, i32 67, i32 87, i32 7>, <4 x i32> <i32 63, i32 48, i32 97, i32 24>
+ %bs = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> <i32 83, i32 89, i32 19, i32 4>, <4 x i32> <i32 21, i32 2, i32 40, i32 21>
+ %bt = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> <i32 45, i32 76, i32 81, i32 100>, <4 x i32> <i32 65, i32 26, i32 100, i32 46>
+ %bu = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> <i32 16, i32 75, i32 31, i32 17>, <4 x i32> <i32 37, i32 66, i32 86, i32 65>
+ %bv = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 13, i32 25, i32 43, i32 59>, <4 x i32> <i32 82, i32 78, i32 60, i32 52>
+ %bw = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %bx = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %by = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %bz = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ca = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cb = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cc = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cd = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ce = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cf = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cg = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ch = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ci = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cj = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ck = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cl = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ store <4 x i32> %a, <4 x i32>* %A
+ store <4 x i32> %b, <4 x i32>* %B
+ store <4 x i32> %c, <4 x i32>* %C
+ store <4 x i32> %d, <4 x i32>* %D
+ store <4 x i32> %e, <4 x i32>* %E
+ store <4 x i32> %f, <4 x i32>* %F
+ store <4 x i32> %g, <4 x i32>* %G
+ store <4 x i32> %h, <4 x i32>* %H
+ store <4 x i32> %i, <4 x i32>* %I
+ store <4 x i32> %j, <4 x i32>* %J
+ store <4 x i32> %k, <4 x i32>* %K
+ store <4 x i32> %l, <4 x i32>* %L
+ store <4 x i32> %m, <4 x i32>* %M
+ store <4 x i32> %n, <4 x i32>* %N
+ store <4 x i32> %o, <4 x i32>* %O
+ store <4 x i32> %p, <4 x i32>* %P
+ store <4 x i32> %q, <4 x i32>* %Q
+ store <4 x i32> %r, <4 x i32>* %R
+ store <4 x i32> %s, <4 x i32>* %S
+ store <4 x i32> %t, <4 x i32>* %T
+ store <4 x i32> %u, <4 x i32>* %U
+ store <4 x i32> %v, <4 x i32>* %V
+ store <4 x i32> %w, <4 x i32>* %W
+ store <4 x i32> %x, <4 x i32>* %X
+ store <4 x i32> %y, <4 x i32>* %Y
+ store <4 x i32> %z, <4 x i32>* %Z
+ store <4 x i32> %ba, <4 x i32>* %BA
+ store <4 x i32> %bb, <4 x i32>* %BB
+ store <4 x i32> %bc, <4 x i32>* %BC
+ store <4 x i32> %bd, <4 x i32>* %BD
+ store <4 x i32> %be, <4 x i32>* %BE
+ store <4 x i32> %bf, <4 x i32>* %BF
+ store <4 x i32> %bg, <4 x i32>* %BG
+ store <4 x i32> %bh, <4 x i32>* %BH
+ store <4 x i32> %bi, <4 x i32>* %BI
+ store <4 x i32> %bj, <4 x i32>* %BJ
+ store <4 x i32> %bk, <4 x i32>* %BK
+ store <4 x i32> %bl, <4 x i32>* %BL
+ store <4 x i32> %bm, <4 x i32>* %BM
+ store <4 x i32> %bn, <4 x i32>* %BN
+ store <4 x i32> %bo, <4 x i32>* %BO
+ store <4 x i32> %bp, <4 x i32>* %BP
+ store <4 x i32> %bq, <4 x i32>* %BQ
+ store <4 x i32> %br, <4 x i32>* %BR
+ store <4 x i32> %bs, <4 x i32>* %BS
+ store <4 x i32> %bt, <4 x i32>* %BT
+ store <4 x i32> %bu, <4 x i32>* %BU
+ store <4 x i32> %bv, <4 x i32>* %BV
+ store <4 x i32> %bw, <4 x i32>* %BW
+ store <4 x i32> %bx, <4 x i32>* %BX
+ store <4 x i32> %by, <4 x i32>* %BY
+ store <4 x i32> %bz, <4 x i32>* %BZ
+ store <4 x i32> %ca, <4 x i32>* %CA
+ store <4 x i32> %cb, <4 x i32>* %CB
+ store <4 x i32> %cc, <4 x i32>* %CC
+ store <4 x i32> %cd, <4 x i32>* %CD
+ store <4 x i32> %ce, <4 x i32>* %CE
+ store <4 x i32> %cf, <4 x i32>* %CF
+ store <4 x i32> %cg, <4 x i32>* %CG
+ store <4 x i32> %ch, <4 x i32>* %CH
+ store <4 x i32> %ci, <4 x i32>* %CI
+ store <4 x i32> %cj, <4 x i32>* %CJ
+ store <4 x i32> %ck, <4 x i32>* %CK
+ store <4 x i32> %cl, <4 x i32>* %CL
+ ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/fold-vector-zero.ll b/llvm/test/Transforms/InstCombine/fold-vector-zero.ll
new file mode 100644
index 00000000000..bf661df7795
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-vector-zero.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | not grep zeroinitializer
+
+define void @foo(i64 %A, i64 %B) {
+bb8:
+	br label %bb30
+
+bb30:
+	%s0 = phi i64 [ 0, %bb8 ], [ %r21, %bb30 ]
+	%l0 = phi i64 [ -2222, %bb8 ], [ %r23, %bb30 ]
+	%r2 = add i64 %s0, %B
+	%r3 = inttoptr i64 %r2 to <2 x double>*
+	%r4 = load <2 x double>, <2 x double>* %r3, align 8
+	%r6 = bitcast <2 x double> %r4 to <2 x i64>
+	%r7 = bitcast <2 x double> zeroinitializer to <2 x i64>
+	%r8 = insertelement <2 x i64> undef, i64 9223372036854775807, i32 0
+	%r9 = insertelement <2 x i64> undef, i64 -9223372036854775808, i32 0
+	%r10 = insertelement <2 x i64> %r8, i64 9223372036854775807, i32 1
+	%r11 = insertelement <2 x i64> %r9, i64 -9223372036854775808, i32 1
+	%r12 = and <2 x i64> %r6, %r10
+	%r13 = and <2 x i64> %r7, %r11
+	%r14 = or <2 x i64> %r12, %r13
+	%r15 = bitcast <2 x i64> %r14 to <2 x double>
+	%r18 = add i64 %s0, %A
+	%r19 = inttoptr i64 %r18 to <2 x double>*
+	store <2 x double> %r15, <2 x double>* %r19, align 8
+	%r21 = add i64 16, %s0
+	%r23 = add i64 1, %l0
+	%r25 = icmp slt i64 %r23, 0
+	%r26 = zext i1 %r25 to i64
+	%r27 = icmp ne i64 %r26, 0
+	br i1 %r27, label %bb30, label %bb5
+
+bb5:
+	ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/fp-ret-bitcast.ll b/llvm/test/Transforms/InstCombine/fp-ret-bitcast.ll
new file mode 100644
index 00000000000..7106933d2bd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fp-ret-bitcast.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep "call float bitcast" | count 1
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+	%struct.NSObject = type { %struct.objc_class* }
+ 	%struct.NSArray = type { %struct.NSObject }
+	%struct.objc_class = type opaque
+ 	%struct.objc_selector = type opaque
+
+@"\01L_OBJC_METH_VAR_NAME_112" = internal global [15 x i8] c"whiteComponent\00", section "__TEXT,__cstring,cstring_literals"
+@"\01L_OBJC_SELECTOR_REFERENCES_81" = internal global %struct.objc_selector* bitcast ([15 x i8]* @"\01L_OBJC_METH_VAR_NAME_112" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip"
+
+define void @bork() nounwind  {
+entry:
+	%color = alloca %struct.NSArray*
+	%color.466 = alloca %struct.NSObject*
+	%tmp103 = load %struct.NSArray*, %struct.NSArray** %color, align 4
+	%tmp103104 = getelementptr %struct.NSArray, %struct.NSArray* %tmp103, i32 0, i32 0
+	store %struct.NSObject* %tmp103104, %struct.NSObject** %color.466, align 4
+	%tmp105 = load %struct.objc_selector*, %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_81", align 4
+	%tmp106 = load %struct.NSObject*, %struct.NSObject** %color.466, align 4
+	%tmp107 = call float bitcast (void (%struct.NSObject*, ...)* @objc_msgSend_fpret to float (%struct.NSObject*, %struct.objc_selector*)*)( %struct.NSObject* %tmp106, %struct.objc_selector* %tmp105 ) nounwind
+	br label %exit
+
+exit:
+	ret void
+}
+
+declare void @objc_msgSend_fpret(%struct.NSObject*, ...)
diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
new file mode 100644
index 00000000000..bfc1de4ff6d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test some floating point casting cases
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i8 @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i8 -1
+;
+  %x = fptoui float 2.550000e+02 to i8
+  ret i8 %x
+}
+
+define i8 @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i8 -1
+;
+  %x = fptosi float -1.000000e+00 to i8
+  ret i8 %x
+}
+
+define half @test3(float %a) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[C:%.*]] = call half @llvm.fabs.f16(half [[TMP1]])
+; CHECK-NEXT:    ret half [[C]]
+;
+  %b = call float @llvm.fabs.f32(float %a)
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+define half @fneg_fptrunc(float %a) {
+; CHECK-LABEL: @fneg_fptrunc(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[C:%.*]] = fsub half 0xH8000, [[TMP1]]
+; CHECK-NEXT:    ret half [[C]]
+;
+  %b = fsub float -0.0, %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+define <2 x half> @fneg_fptrunc_vec_undef(<2 x float> %a) {
+; CHECK-LABEL: @fneg_fptrunc_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc <2 x float> [[A:%.*]] to <2 x half>
+; CHECK-NEXT:    [[C:%.*]] = fsub <2 x half> <half 0xH8000, half 0xH8000>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x half> [[C]]
+;
+  %b = fsub <2 x float> <float -0.0, float undef>, %a
+  %c = fptrunc <2 x float> %b to <2 x half>
+  ret <2 x half> %c
+}
+
+define half @test4-fast(float %a) {
+; CHECK-LABEL: @test4-fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[C:%.*]] = fsub fast half 0xH8000, [[TMP1]]
+; CHECK-NEXT:    ret half [[C]]
+;
+  %b = fsub fast float -0.0, %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+define half @test5(float %a, float %b, float %c) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[D:%.*]] = fcmp ogt float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[D]], float [[C:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[F:%.*]] = fptrunc float [[E]] to half
+; CHECK-NEXT:    ret half [[F]]
+;
+  %d = fcmp ogt float %a, %b
+  %e = select i1 %d, float %c, float 1.0
+  %f = fptrunc float %e to half
+  ret half %f
+}
+
+declare float @llvm.fabs.f32(float) nounwind readonly
+
+define <1 x float> @test6(<1 x double> %V) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[FREM:%.*]] = frem <1 x double> [[V:%.*]], [[V]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <1 x double> [[FREM]] to <1 x float>
+; CHECK-NEXT:    ret <1 x float> [[TRUNC]]
+;
+  %frem = frem <1 x double> %V, %V
+  %trunc = fptrunc <1 x double> %frem to <1 x float>
+  ret <1 x float> %trunc
+}
+
+define float @test7(double %V) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[FREM:%.*]] = frem double [[V:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc double [[FREM]] to float
+; CHECK-NEXT:    ret float [[TRUNC]]
+;
+  %frem = frem double %V, 1.000000e+00
+  %trunc = fptrunc double %frem to float
+  ret float %trunc
+}
+
+define float @test8(float %V) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[FEXT:%.*]] = fpext float [[V:%.*]] to double
+; CHECK-NEXT:    [[FREM:%.*]] = frem double [[FEXT]], 1.000000e-01
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc double [[FREM]] to float
+; CHECK-NEXT:    ret float [[TRUNC]]
+;
+  %fext = fpext float %V to double
+  %frem = frem double %fext, 1.000000e-01
+  %trunc = fptrunc double %frem to float
+  ret float %trunc
+}
+
+define half @test_fptrunc_fptrunc(double %V) {
+; CHECK-LABEL: @test_fptrunc_fptrunc(
+; CHECK-NEXT:    [[T1:%.*]] = fptrunc double [[V:%.*]] to float
+; CHECK-NEXT:    [[T2:%.*]] = fptrunc float [[T1]] to half
+; CHECK-NEXT:    ret half [[T2]]
+;
+  %t1 = fptrunc double %V to float
+  %t2 = fptrunc float %t1 to half
+  ret half %t2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fpextend.ll b/llvm/test/Transforms/InstCombine/fpextend.ll
new file mode 100644
index 00000000000..88401504f57
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fpextend.ll
@@ -0,0 +1,283 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @test(float %x) nounwind  {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP34:%.*]] = fadd float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret float [[TMP34]]
+;
+entry:
+  %tmp1 = fpext float %x to double
+  %tmp3 = fadd double %tmp1, 0.000000e+00
+  %tmp34 = fptrunc double %tmp3 to float
+  ret float %tmp34
+}
+
+define float @test2(float %x, float %y) nounwind  {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP56:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[TMP56]]
+;
+entry:
+  %tmp1 = fpext float %x to double
+  %tmp23 = fpext float %y to double
+  %tmp5 = fmul double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test3(float %x, float %y) nounwind  {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP56:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[TMP56]]
+;
+entry:
+  %tmp1 = fpext float %x to double
+  %tmp23 = fpext float %y to double
+  %tmp5 = fdiv double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test4(float %x) nounwind  {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP34:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[TMP34]]
+;
+entry:
+  %tmp1 = fpext float %x to double
+  %tmp2 = fsub double -0.000000e+00, %tmp1
+  %tmp34 = fptrunc double %tmp2 to float
+  ret float %tmp34
+}
+
+; Test with vector splat constant
+define <2 x float> @test5(<2 x float> %x) nounwind  {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP34:%.*]] = fadd <2 x float> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[TMP34]]
+;
+entry:
+  %tmp1 = fpext <2 x float> %x to <2 x double>
+  %tmp3 = fadd <2 x double> %tmp1, <double 0.000000e+00, double 0.000000e+00>
+  %tmp34 = fptrunc <2 x double> %tmp3 to <2 x float>
+  ret <2 x float> %tmp34
+}
+
+; Test with a non-splat constant
+define <2 x float> @test6(<2 x float> %x) nounwind  {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP34:%.*]] = fadd <2 x float> [[X:%.*]], <float 0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[TMP34]]
+;
+entry:
+  %tmp1 = fpext <2 x float> %x to <2 x double>
+  %tmp3 = fadd <2 x double> %tmp1, <double 0.000000e+00, double -0.000000e+00>
+  %tmp34 = fptrunc <2 x double> %tmp3 to <2 x float>
+  ret <2 x float> %tmp34
+}
+
+; Test with an undef element
+; TODO: Support undef elements.
+define <2 x float> @test6_undef(<2 x float> %x) nounwind  {
+; CHECK-LABEL: @test6_undef(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext <2 x float> [[X:%.*]] to <2 x double>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], <double 0.000000e+00, double undef>
+; CHECK-NEXT:    [[TMP34:%.*]] = fptrunc <2 x double> [[TMP3]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TMP34]]
+;
+entry:
+  %tmp1 = fpext <2 x float> %x to <2 x double>
+  %tmp3 = fadd <2 x double> %tmp1, <double 0.000000e+00, double undef>
+  %tmp34 = fptrunc <2 x double> %tmp3 to <2 x float>
+  ret <2 x float> %tmp34
+}
+
+define <2 x float> @not_half_shrinkable(<2 x float> %x) {
+; CHECK-LABEL: @not_half_shrinkable(
+; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[X:%.*]], <float 0.000000e+00, float 2.049000e+03>
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %ext = fpext <2 x float> %x to <2 x double>
+  %add = fadd <2 x double> %ext, <double 0.0, double 2049.0>
+  %r = fptrunc <2 x double> %add to <2 x float>
+  ret <2 x float>  %r
+}
+
+define half @test7(float %a) nounwind {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[Z:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    ret half [[Z]]
+;
+  %y = fpext float %a to double
+  %z = fptrunc double %y to half
+  ret half %z
+}
+
+define float @test8(half %a) nounwind {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[Z:%.*]] = fpext half [[A:%.*]] to float
+; CHECK-NEXT:    ret float [[Z]]
+;
+  %y = fpext half %a to double
+  %z = fptrunc double %y to float
+  ret float %z
+}
+
+define float @test9(half %x, half %y) nounwind  {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[X:%.*]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext half [[Y:%.*]] to float
+; CHECK-NEXT:    [[TMP56:%.*]] = fmul float [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret float [[TMP56]]
+;
+entry:
+  %tmp1 = fpext half %x to double
+  %tmp23 = fpext half %y to double
+  %tmp5 = fmul double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test10(half %x, float %y) nounwind  {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[X:%.*]] to float
+; CHECK-NEXT:    [[TMP56:%.*]] = fmul float [[TMP0]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[TMP56]]
+;
+entry:
+  %tmp1 = fpext half %x to double
+  %tmp23 = fpext float %y to double
+  %tmp5 = fmul double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test11(half %x) nounwind  {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[X:%.*]] to float
+; CHECK-NEXT:    [[TMP34:%.*]] = fadd float [[TMP0]], 0.000000e+00
+; CHECK-NEXT:    ret float [[TMP34]]
+;
+entry:
+  %tmp1 = fpext half %x to double
+  %tmp3 = fadd double %tmp1, 0.000000e+00
+  %tmp34 = fptrunc double %tmp3 to float
+  ret float %tmp34
+}
+
+define float @test12(float %x, half %y) nounwind  {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[Y:%.*]] to float
+; CHECK-NEXT:    [[TMP34:%.*]] = fadd float [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret float [[TMP34]]
+;
+entry:
+  %tmp1 = fpext float %x to double
+  %tmp2 = fpext half %y to double
+  %tmp3 = fadd double %tmp1, %tmp2
+  %tmp34 = fptrunc double %tmp3 to float
+  ret float %tmp34
+}
+
+define float @test13(half %x, float %y) nounwind  {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[X:%.*]] to float
+; CHECK-NEXT:    [[TMP56:%.*]] = fdiv float [[TMP0]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[TMP56]]
+;
+entry:
+  %tmp1 = fpext half %x to double
+  %tmp23 = fpext float %y to double
+  %tmp5 = fdiv double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test14(float %x, half %y) nounwind  {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[Y:%.*]] to float
+; CHECK-NEXT:    [[TMP56:%.*]] = fdiv float [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret float [[TMP56]]
+;
+entry:
+  %tmp1 = fpext float %x to double
+  %tmp23 = fpext half %y to double
+  %tmp5 = fdiv double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test15(half %x, half %y) nounwind  {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[X:%.*]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext half [[Y:%.*]] to float
+; CHECK-NEXT:    [[TMP56:%.*]] = fdiv float [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret float [[TMP56]]
+;
+entry:
+  %tmp1 = fpext half %x to double
+  %tmp23 = fpext half %y to double
+  %tmp5 = fdiv double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test16(half %x, float %y) nounwind  {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[X:%.*]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = frem float [[TMP0]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+entry:
+  %tmp1 = fpext half %x to double
+  %tmp23 = fpext float %y to double
+  %tmp5 = frem double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test17(float %x, half %y) nounwind  {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fpext half [[Y:%.*]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = frem float [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+entry:
+  %tmp1 = fpext float %x to double
+  %tmp23 = fpext half %y to double
+  %tmp5 = frem double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
+
+define float @test18(half %x, half %y) nounwind  {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = frem half [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP56:%.*]] = fpext half [[TMP0]] to float
+; CHECK-NEXT:    ret float [[TMP56]]
+;
+entry:
+  %tmp1 = fpext half %x to double
+  %tmp23 = fpext half %y to double
+  %tmp5 = frem double %tmp1, %tmp23
+  %tmp56 = fptrunc double %tmp5 to float
+  ret float %tmp56
+}
diff --git a/llvm/test/Transforms/InstCombine/fpextend_x86.ll b/llvm/test/Transforms/InstCombine/fpextend_x86.ll
new file mode 100644
index 00000000000..e012551ebdf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fpextend_x86.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -S | FileCheck %s
+target triple = "x86_64-apple-macosx"
+
+define double @test1(double %a, double %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext double %b to x86_fp80
+  %wr = fadd x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test1
+; CHECK: fadd x86_fp80
+; CHECK: ret
+}
+
+define double @test2(double %a, double %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext double %b to x86_fp80
+  %wr = fsub x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test2
+; CHECK: fsub x86_fp80
+; CHECK: ret
+}
+
+define double @test3(double %a, double %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext double %b to x86_fp80
+  %wr = fmul x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test3
+; CHECK: fmul x86_fp80
+; CHECK: ret
+}
+
+define double @test4(double %a, half %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext half %b to x86_fp80
+  %wr = fmul x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test4
+; CHECK: fmul double
+; CHECK: ret
+}
+
+define double @test5(double %a, double %b) nounwind {
+  %wa = fpext double %a to x86_fp80
+  %wb = fpext double %b to x86_fp80
+  %wr = fdiv x86_fp80 %wa, %wb
+  %r = fptrunc x86_fp80 %wr to double
+  ret double %r
+; CHECK: test5
+; CHECK: fdiv x86_fp80
+; CHECK: ret
+}
diff --git a/llvm/test/Transforms/InstCombine/fprintf-1.ll b/llvm/test/Transforms/InstCombine/fprintf-1.ll
new file mode 100644
index 00000000000..cb364102c7f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fprintf-1.ll
@@ -0,0 +1,98 @@
+; Test that the fprintf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [3 x i8] c"%s\00"
+
+declare i32 @fprintf(%FILE*, i8*, ...)
+
+; Check fprintf(fp, "foo") -> fwrite("foo", 3, 1, fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify1(
+  %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, "%c", chr) -> fputc(chr, fp).
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify2(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, i8 104)
+; CHECK-NEXT: call i32 @fputc(i32 104, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, "%s", str) -> fputs(str, fp).
+; NOTE: The fputs simplifier simplifies this further to fwrite.
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify3(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, fmt, ...) -> fiprintf(fp, fmt, ...) if no floating point.
+
+define void @test_simplify4(%FILE* %fp) {
+; CHECK-IPRINTF-LABEL: @test_simplify4(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...) @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-IPRINTF-NEXT: ret void
+}
+
+define void @test_simplify5(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify5(
+  %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt) [ "deopt"() ]
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp) [ "deopt"() ]
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_no_simplify1(%FILE* %fp) {
+; CHECK-IPRINTF-LABEL: @test_no_simplify1(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, double 1.87)
+; CHECK-IPRINTF-NEXT: call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-IPRINTF-NEXT: ret void
+}
+
+define void @test_no_simplify2(%FILE* %fp, double %d) {
+; CHECK-LABEL: @test_no_simplify2(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define i32 @test_no_simplify3(%FILE* %fp) {
+; CHECK-LABEL: @test_no_simplify3(
+  %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  %1 = call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0))
+  ret i32 %1
+; CHECK-NEXT: ret i32 %1
+}
diff --git a/llvm/test/Transforms/InstCombine/fputs-1.ll b/llvm/test/Transforms/InstCombine/fputs-1.ll
new file mode 100644
index 00000000000..4bf54b155a4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fputs-1.ll
@@ -0,0 +1,43 @@
+; Test that the fputs library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@empty = constant [1 x i8] zeroinitializer
+@A = constant [2 x i8] c"A\00"
+@hello = constant [7 x i8] c"hello\0A\00"
+
+declare i32 @fputs(i8*, %FILE*)
+
+; Check fputs(str, fp) --> fwrite(str, strlen(s), 1, fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify1(
+  %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; NOTE: The fwrite simplifier simplifies this further to fputc.
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify2(
+  %str = getelementptr [2 x i8], [2 x i8]* @A, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+; CHECK-NEXT: call i32 @fputc(i32 65, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify3(
+  %str = getelementptr [7 x i8], [7 x i8]* @hello, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @hello, i32 0, i32 0), i32 6, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/fputs-opt-size.ll b/llvm/test/Transforms/InstCombine/fputs-opt-size.ll
new file mode 100644
index 00000000000..54ac96f9f86
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fputs-opt-size.ll
@@ -0,0 +1,61 @@
+; When optimising for size, we don't want to rewrite fputs to fwrite
+; because it requires more arguments and thus extra MOVs are required.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -pgso -S | FileCheck %s -check-prefix=PGSO
+; RUN: opt < %s -instcombine -pgso=false -S | FileCheck %s -check-prefix=NPGSO
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@.str = private unnamed_addr constant [10 x i8] c"mylog.txt\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"a\00", align 1
+@.str.2 = private unnamed_addr constant [27 x i8] c"Hello world this is a test\00", align 1
+
+define i32 @main() local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: @main(
+; CHECK-NOT: call i64 @fwrite
+; CHECK: call i32 @fputs
+
+  %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2
+  %call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2
+  ret i32 0
+}
+
+declare noalias %struct._IO_FILE* @fopen(i8* nocapture readonly, i8* nocapture readonly) local_unnamed_addr #1
+declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture) local_unnamed_addr #1
+
+attributes #0 = { nounwind optsize }
+attributes #1 = { nounwind optsize  }
+
+define i32 @main_pgso() local_unnamed_addr !prof !14 {
+entry:
+; PGSO-LABEL: @main_pgso(
+; PGSO-NOT: call i64 @fwrite
+; PGSO: call i32 @fputs
+; NPGSO-LABEL: @main_pgso(
+; NPGSO: call i64 @fwrite
+; NPGSO-NOT: call i32 @fputs
+
+  %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2
+  %call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2
+  ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
new file mode 100644
index 00000000000..88e9eb72630
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -0,0 +1,638 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i33 @llvm.fshr.i33(i33, i33, i33)
+declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>)
+
+; If the shift mask doesn't include any demanded bits, the funnel shift can be eliminated.
+
+define i32 @fshl_mask_simplify1(i32 %x, i32 %y, i32 %sh) {
+; CHECK-LABEL: @fshl_mask_simplify1(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %maskedsh = and i32 %sh, 32
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
+  ret i32 %r
+}
+
+define <2 x i32> @fshr_mask_simplify2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) {
+; CHECK-LABEL: @fshr_mask_simplify2(
+; CHECK-NEXT:    ret <2 x i32> [[Y:%.*]]
+;
+  %maskedsh = and <2 x i32> %sh, <i32 64, i32 64>
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %maskedsh)
+  ret <2 x i32> %r
+}
+
+; Negative test.
+
+define i32 @fshl_mask_simplify3(i32 %x, i32 %y, i32 %sh) {
+; CHECK-LABEL: @fshl_mask_simplify3(
+; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 16
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %maskedsh = and i32 %sh, 16
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
+  ret i32 %r
+}
+
+; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
+
+define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) {
+; CHECK-LABEL: @fshr_mask_simplify1(
+; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i33 [[SH:%.*]], 64
+; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 [[MASKEDSH]])
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %maskedsh = and i33 %sh, 64
+  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %maskedsh)
+  ret i33 %r
+}
+
+; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
+
+define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) {
+; CHECK-LABEL: @fshl_mask_simplify2(
+; CHECK-NEXT:    [[MASKEDSH:%.*]] = and <2 x i31> [[SH:%.*]], <i31 32, i31 32>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[MASKEDSH]])
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %maskedsh = and <2 x i31> %sh, <i31 32, i31 32>
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> %maskedsh)
+  ret <2 x i31> %r
+}
+
+; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
+
+define i33 @fshr_mask_simplify3(i33 %x, i33 %y, i33 %sh) {
+; CHECK-LABEL: @fshr_mask_simplify3(
+; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i33 [[SH:%.*]], 32
+; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 [[MASKEDSH]])
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %maskedsh = and i33 %sh, 32
+  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %maskedsh)
+  ret i33 %r
+}
+
+; This mask op is unnecessary.
+
+define i32 @fshl_mask_not_required(i32 %x, i32 %y, i32 %sh) {
+; CHECK-LABEL: @fshl_mask_not_required(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[SH:%.*]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %maskedsh = and i32 %sh, 31
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
+  ret i32 %r
+}
+
+; This mask op can be reduced.
+
+define i32 @fshl_mask_reduce_constant(i32 %x, i32 %y, i32 %sh) {
+; CHECK-LABEL: @fshl_mask_reduce_constant(
+; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %maskedsh = and i32 %sh, 33
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
+  ret i32 %r
+}
+
+; But this mask op is required.
+
+define i32 @fshl_mask_negative(i32 %x, i32 %y, i32 %sh) {
+; CHECK-LABEL: @fshl_mask_negative(
+; CHECK-NEXT:    [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 15
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %maskedsh = and i32 %sh, 15
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
+  ret i32 %r
+}
+
+; The transform is not limited to mask ops.
+
+define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) {
+; CHECK-LABEL: @fshr_set_but_not_demanded_vec(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[SH:%.*]])
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %bogusbits = or <2 x i32> %sh, <i32 32, i32 32>
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %bogusbits)
+  ret <2 x i32> %r
+}
+
+; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
+
+define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) {
+; CHECK-LABEL: @fshl_set_but_not_demanded_vec(
+; CHECK-NEXT:    [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], <i31 32, i31 32>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> [[BOGUSBITS]])
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %bogusbits = or <2 x i31> %sh, <i31 32, i31 32>
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> %bogusbits)
+  ret <2 x i31> %r
+}
+
+; Simplify one undef or zero operand and constant shift amount.
+
+define i32 @fshl_op0_undef(i32 %x) {
+; CHECK-LABEL: @fshl_op0_undef(
+; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], 25
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 undef, i32 %x, i32 7)
+  ret i32 %r
+}
+
+define i32 @fshl_op0_zero(i32 %x) {
+; CHECK-LABEL: @fshl_op0_zero(
+; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], 25
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 0, i32 %x, i32 7)
+  ret i32 %r
+}
+
+define i33 @fshr_op0_undef(i33 %x) {
+; CHECK-LABEL: @fshr_op0_undef(
+; CHECK-NEXT:    [[R:%.*]] = lshr i33 [[X:%.*]], 7
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %r = call i33 @llvm.fshr.i33(i33 undef, i33 %x, i33 7)
+  ret i33 %r
+}
+
+define i33 @fshr_op0_zero(i33 %x) {
+; CHECK-LABEL: @fshr_op0_zero(
+; CHECK-NEXT:    [[R:%.*]] = lshr i33 [[X:%.*]], 7
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %r = call i33 @llvm.fshr.i33(i33 0, i33 %x, i33 7)
+  ret i33 %r
+}
+
+define i32 @fshl_op1_undef(i32 %x) {
+; CHECK-LABEL: @fshl_op1_undef(
+; CHECK-NEXT:    [[R:%.*]] = shl i32 [[X:%.*]], 7
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 undef, i32 7)
+  ret i32 %r
+}
+
+define i32 @fshl_op1_zero(i32 %x) {
+; CHECK-LABEL: @fshl_op1_zero(
+; CHECK-NEXT:    [[R:%.*]] = shl i32 [[X:%.*]], 7
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 7)
+  ret i32 %r
+}
+
+define i33 @fshr_op1_undef(i33 %x) {
+; CHECK-LABEL: @fshr_op1_undef(
+; CHECK-NEXT:    [[R:%.*]] = shl i33 [[X:%.*]], 26
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %r = call i33 @llvm.fshr.i33(i33 %x, i33 undef, i33 7)
+  ret i33 %r
+}
+
+define i33 @fshr_op1_zero(i33 %x) {
+; CHECK-LABEL: @fshr_op1_zero(
+; CHECK-NEXT:    [[R:%.*]] = shl i33 [[X:%.*]], 26
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %r = call i33 @llvm.fshr.i33(i33 %x, i33 0, i33 7)
+  ret i33 %r
+}
+
+define <2 x i31> @fshl_op0_zero_splat_vec(<2 x i31> %x) {
+; CHECK-LABEL: @fshl_op0_zero_splat_vec(
+; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i31> [[X:%.*]], <i31 24, i31 24>
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 7, i31 7>)
+  ret <2 x i31> %r
+}
+
+define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) {
+; CHECK-LABEL: @fshl_op1_undef_splat_vec(
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i31> [[X:%.*]], <i31 7, i31 7>
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 7, i31 7>)
+  ret <2 x i31> %r
+}
+
+define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @fshr_op0_undef_splat_vec(
+; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 7, i32 7>)
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @fshr_op1_zero_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @fshr_op1_zero_splat_vec(
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 25, i32 25>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 7, i32 7>)
+  ret <2 x i32> %r
+}
+
+define <2 x i31> @fshl_op0_zero_vec(<2 x i31> %x) {
+; CHECK-LABEL: @fshl_op0_zero_vec(
+; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i31> [[X:%.*]], <i31 30, i31 29>
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 -1, i31 33>)
+  ret <2 x i31> %r
+}
+
+define <2 x i31> @fshl_op1_undef_vec(<2 x i31> %x) {
+; CHECK-LABEL: @fshl_op1_undef_vec(
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i31> [[X:%.*]], <i31 1, i31 2>
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 -1, i31 33>)
+  ret <2 x i31> %r
+}
+
+define <2 x i32> @fshr_op0_undef_vec(<2 x i32> %x) {
+; CHECK-LABEL: @fshr_op0_undef_vec(
+; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 -1, i32 33>)
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) {
+; CHECK-LABEL: @fshr_op1_zero_vec(
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 1, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 33>)
+  ret <2 x i32> %r
+}
+
+; Only demand bits from one of the operands.
+
+define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_only_op0_demanded(
+; CHECK-NEXT:    [[Z:%.*]] = shl i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 128
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
+  %r = and i32 %z, 128
+  ret i32 %r
+}
+
+define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_only_op1_demanded(
+; CHECK-NEXT:    [[Z:%.*]] = lshr i32 [[Y:%.*]], 25
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 63
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
+  %r = and i32 %z, 63
+  ret i32 %r
+}
+
+define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) {
+; CHECK-LABEL: @fshr_only_op1_demanded(
+; CHECK-NEXT:    [[Z:%.*]] = lshr i33 [[Y:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = and i33 [[Z]], 12392
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7)
+  %r = and i33 %z, 12392
+  ret i33 %r
+}
+
+define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) {
+; CHECK-LABEL: @fshr_only_op0_demanded(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i33 [[X:%.*]], 4
+; CHECK-NEXT:    [[R:%.*]] = and i33 [[TMP1]], 7
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7)
+  %r = lshr i33 %z, 30
+  ret i33 %r
+}
+
+define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat(
+; CHECK-NEXT:    [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 7>)
+  %r = and <2 x i31> %z, <i31 63, i31 31>
+  ret <2 x i31> %r
+}
+
+define i32 @fshl_constant_shift_amount_modulo_bitwidth(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 1)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 33)
+  ret i32 %r
+}
+
+define i33 @fshr_constant_shift_amount_modulo_bitwidth(i33 %x, i33 %y) {
+; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth(
+; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 32)
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 34)
+  ret i33 %r
+}
+
+@external_global = external global i8
+
+define i33 @fshr_constant_shift_amount_modulo_bitwidth_constexpr(i33 %x, i33 %y) {
+; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_constexpr(
+; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 ptrtoint (i8* @external_global to i33))
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %shamt = ptrtoint i8* @external_global to i33
+  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %shamt)
+  ret i33 %r
+}
+
+define <2 x i32> @fshr_constant_shift_amount_modulo_bitwidth_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_vec(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 30, i32 1>)
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 34, i32 -1>)
+  ret <2 x i32> %r
+}
+
+define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec(<2 x i31> %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 3, i31 1>)
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 -1>)
+  ret <2 x i31> %r
+}
+
+define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(<2 x i31> %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 34, i31 ptrtoint (i8* @external_global to i31)>)
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %shamt = ptrtoint i8* @external_global to i31
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 ptrtoint (i8* @external_global to i31)>)
+  ret <2 x i31> %r
+}
+
+; The shift modulo bitwidth is the same for all vector elements.
+
+define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat(
+; CHECK-NEXT:    [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 38>)
+  %r = and <2 x i31> %z, <i31 63, i31 31>
+  ret <2 x i31> %r
+}
+
+define i32 @rotl_constant_shift_amount(i32 %x) {
+; CHECK-LABEL: @rotl_constant_shift_amount(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 1)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 33)
+  ret i32 %r
+}
+
+define <2 x i31> @rotl_constant_shift_amount_vec(<2 x i31> %x) {
+; CHECK-LABEL: @rotl_constant_shift_amount_vec(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[X]], <2 x i31> <i31 1, i31 1>)
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %x, <2 x i31> <i31 32, i31 -1>)
+  ret <2 x i31> %r
+}
+
+define i33 @rotr_constant_shift_amount(i33 %x) {
+; CHECK-LABEL: @rotr_constant_shift_amount(
+; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[X]], i33 32)
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %r = call i33 @llvm.fshr.i33(i33 %x, i33 %x, i33 34)
+  ret i33 %r
+}
+
+define <2 x i32> @rotr_constant_shift_amount_vec(<2 x i32> %x) {
+; CHECK-LABEL: @rotr_constant_shift_amount_vec(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 31, i32 1>)
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 33, i32 -1>)
+  ret <2 x i32> %r
+}
+
+; Demand bits from both operands -- cannot simplify.
+
+define i32 @fshl_both_ops_demanded(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_both_ops_demanded(
+; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7)
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[Z]], 192
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %z = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
+  %r = and i32 %z, 192
+  ret i32 %r
+}
+
+define i33 @fshr_both_ops_demanded(i33 %x, i33 %y) {
+; CHECK-LABEL: @fshr_both_ops_demanded(
+; CHECK-NEXT:    [[Z:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7)
+; CHECK-NEXT:    [[R:%.*]] = and i33 [[Z]], 192
+; CHECK-NEXT:    ret i33 [[R]]
+;
+  %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 26)
+  %r = and i33 %z, 192
+  ret i33 %r
+}
+
+; Both operands are demanded, but there are known bits.
+
+define i32 @fshl_known_bits(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_known_bits(
+; CHECK-NEXT:    ret i32 128
+;
+  %x2 = or i32 %x, 1   ; lo bit set
+  %y2 = lshr i32 %y, 1 ; hi bit clear
+  %z = call i32 @llvm.fshl.i32(i32 %x2, i32 %y2, i32 7)
+  %r = and i32 %z, 192
+  ret i32 %r
+}
+
+define i33 @fshr_known_bits(i33 %x, i33 %y) {
+; CHECK-LABEL: @fshr_known_bits(
+; CHECK-NEXT:    ret i33 128
+;
+  %x2 = or i33 %x, 1 ; lo bit set
+  %y2 = lshr i33 %y, 1 ; hi bit set
+  %z = call i33 @llvm.fshr.i33(i33 %x2, i33 %y2, i33 26)
+  %r = and i33 %z, 192
+  ret i33 %r
+}
+
+; This case fails to simplify due to multiple uses.
+
+define i33 @fshr_multi_use(i33 %a) {
+; CHECK-LABEL: @fshr_multi_use(
+; CHECK-NEXT:    [[B:%.*]] = call i33 @llvm.fshl.i33(i33 [[A:%.*]], i33 [[A]], i33 32)
+; CHECK-NEXT:    [[C:%.*]] = lshr i33 [[B]], 23
+; CHECK-NEXT:    [[D:%.*]] = xor i33 [[C]], [[B]]
+; CHECK-NEXT:    [[E:%.*]] = and i33 [[D]], 31
+; CHECK-NEXT:    ret i33 [[E]]
+;
+  %b = tail call i33 @llvm.fshr.i33(i33 %a, i33 %a, i33 1)
+  %c = lshr i33 %b, 23
+  %d = xor i33 %c, %b
+  %e = and i33 %d, 31
+  ret i33 %e
+}
+
+; This demonstrates the same simplification working if the fshr intrinsic
+; is expanded into shifts and or.
+
+define i33 @expanded_fshr_multi_use(i33 %a) {
+; CHECK-LABEL: @expanded_fshr_multi_use(
+; CHECK-NEXT:    [[TMP:%.*]] = lshr i33 [[A:%.*]], 1
+; CHECK-NEXT:    [[C:%.*]] = lshr i33 [[A]], 24
+; CHECK-NEXT:    [[D:%.*]] = xor i33 [[C]], [[TMP]]
+; CHECK-NEXT:    [[E:%.*]] = and i33 [[D]], 31
+; CHECK-NEXT:    ret i33 [[E]]
+;
+  %tmp = lshr i33 %a, 1
+  %tmp2 = shl i33 %a, 32
+  %b = or i33 %tmp, %tmp2
+  %c = lshr i33 %b, 23
+  %d = xor i33 %c, %b
+  %e = and i33 %d, 31
+  ret i33 %e
+}
+
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+
+; Special-case: rotate a 16-bit value left/right by 8-bits is bswap.
+
+define i16 @fshl_bswap(i16 %x) {
+; CHECK-LABEL: @fshl_bswap(
+; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 8)
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %r = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 8)
+  ret i16 %r
+}
+
+define i16 @fshr_bswap(i16 %x) {
+; CHECK-LABEL: @fshr_bswap(
+; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 8)
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 8)
+  ret i16 %r
+}
+
+define i32 @fshl_mask_args_same1(i32 %a) {
+; CHECK-LABEL: @fshl_mask_args_same1(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[A:%.*]], 16
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = and i32 %a, 4294901760 ; 0xffff0000
+  %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 16)
+  ret i32 %tmp2
+}
+
+define i32 @fshl_mask_args_same2(i32 %a) {
+; CHECK-LABEL: @fshl_mask_args_same2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 65280
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = and i32 %a, 255
+  %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 8)
+  ret i32 %tmp2
+}
+
+define i32 @fshl_mask_args_same3(i32 %a) {
+; CHECK-LABEL: @fshl_mask_args_same3(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[A:%.*]], 24
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = and i32 %a, 255
+  %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 24)
+  ret i32 %tmp2
+}
+
+define i32 @fshl_mask_args_different(i32 %a) {
+; CHECK-LABEL: @fshl_mask_args_different(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], 130560
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp2 = and i32 %a, 4294901760 ; 0xfffff00f
+  %tmp1 = and i32 %a, 4278190080 ; 0xff00f00f
+  %tmp3 = call i32 @llvm.fshl.i32(i32 %tmp2, i32 %tmp1, i32 17)
+  ret i32 %tmp3
+}
+
+define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) {
+; CHECK-LABEL: @fshr_mask_args_same_vector(
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i31> [[A:%.*]], <i31 10, i31 10>
+; CHECK-NEXT:    ret <2 x i31> [[TMP3]]
+;
+  %tmp1 = and <2 x i31> %a, <i31 1000, i31 1000>
+  %tmp2 = and <2 x i31> %a, <i31 6442450943, i31 6442450943>
+  %tmp3 = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %tmp2, <2 x i31> %tmp1, <2 x i31> <i31 10, i31 10>)
+  ret <2 x i31> %tmp3
+}
+
+define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @fshr_mask_args_same_vector2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1000000, i32 100000>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %tmp1 = and <2 x i32> %a, <i32 1000000, i32 100000>
+  %tmp2 = and <2 x i32> %a, <i32 6442450943, i32 6442450943>
+  %tmp3 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> <i32 3, i32 3>)
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i31> @fshr_mask_args_same_vector3_different_but_still_prunable(<2 x i31> %a) {
+; CHECK-LABEL: @fshr_mask_args_same_vector3_different_but_still_prunable(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i31> [[A:%.*]], <i31 1000, i31 1000>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[A]], <2 x i31> [[TMP1]], <2 x i31> <i31 10, i31 3>)
+; CHECK-NEXT:    ret <2 x i31> [[TMP3]]
+;
+  %tmp1 = and <2 x i31> %a, <i31 1000, i31 1000>
+  %tmp2 = and <2 x i31> %a, <i31 6442450943, i31 6442450943>
+  %tmp3 = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %tmp2, <2 x i31> %tmp1, <2 x i31> <i31 10, i31 3>)
+  ret <2 x i31> %tmp3
+}
diff --git a/llvm/test/Transforms/InstCombine/fsub.ll b/llvm/test/Transforms/InstCombine/fsub.ll
new file mode 100644
index 00000000000..4868ece222d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fsub.ll
@@ -0,0 +1,271 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR4374
+
+define float @test1(float %x, float %y) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[T1:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fsub float -0.000000e+00, [[T1]]
+; CHECK-NEXT:    ret float [[T2]]
+;
+  %t1 = fsub float %x, %y
+  %t2 = fsub float -0.0, %t1
+  ret float %t2
+}
+
+; Can't do anything with the test above because -0.0 - 0.0 = -0.0, but if we have nsz:
+; -(X - Y) --> Y - X
+
+define float @neg_sub_nsz(float %x, float %y) {
+; CHECK-LABEL: @neg_sub_nsz(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub nsz float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %t1 = fsub float %x, %y
+  %t2 = fsub nsz float -0.0, %t1
+  ret float %t2
+}
+
+; If the subtract has another use, we don't do the transform (even though it
+; doesn't increase the IR instruction count) because we assume that fneg is
+; easier to analyze and generally cheaper than generic fsub.
+
+declare void @use(float)
+declare void @use2(float, double)
+
+define float @neg_sub_nsz_extra_use(float %x, float %y) {
+; CHECK-LABEL: @neg_sub_nsz_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fsub nsz float -0.000000e+00, [[T1]]
+; CHECK-NEXT:    call void @use(float [[T1]])
+; CHECK-NEXT:    ret float [[T2]]
+;
+  %t1 = fsub float %x, %y
+  %t2 = fsub nsz float -0.0, %t1
+  call void @use(float %t1)
+  ret float %t2
+}
+
+; With nsz: Z - (X - Y) --> Z + (Y - X)
+
+define float @sub_sub_nsz(float %x, float %y, float %z) {
+; CHECK-LABEL: @sub_sub_nsz(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub nsz float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fadd nsz float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[T2]]
+;
+  %t1 = fsub float %x, %y
+  %t2 = fsub nsz float %z, %t1
+  ret float %t2
+}
+
+; With nsz and reassoc: Y - ((X * 5) + Y) --> X * -5
+
+define float @sub_add_neg_x(float %x, float %y) {
+; CHECK-LABEL: @sub_add_neg_x(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], -5.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %mul = fmul float %x, 5.000000e+00
+  %add = fadd float %mul, %y
+  %r = fsub nsz reassoc float %y, %add
+  ret float %r
+}
+
+; Same as above: if 'Z' is not -0.0, swap fsub operands and convert to fadd.
+
+define float @sub_sub_known_not_negzero(float %x, float %y) {
+; CHECK-LABEL: @sub_sub_known_not_negzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fadd float [[TMP1]], 4.200000e+01
+; CHECK-NEXT:    ret float [[T2]]
+;
+  %t1 = fsub float %x, %y
+  %t2 = fsub float 42.0, %t1
+  ret float %t2
+}
+
+; <rdar://problem/7530098>
+
+define double @test2(double %x, double %y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[T1:%.*]] = fadd double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fsub double [[X]], [[T1]]
+; CHECK-NEXT:    ret double [[T2]]
+;
+  %t1 = fadd double %x, %y
+  %t2 = fsub double %x, %t1
+  ret double %t2
+}
+
+; X - C --> X + (-C)
+
+define float @constant_op1(float %x, float %y) {
+; CHECK-LABEL: @constant_op1(
+; CHECK-NEXT:    [[R:%.*]] = fadd float [[X:%.*]], -4.200000e+01
+; CHECK-NEXT:    ret float [[R]]
+;
+  %r = fsub float %x, 42.0
+  ret float %r
+}
+
+define <2 x float> @constant_op1_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @constant_op1_vec(
+; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[X:%.*]], <float -4.200000e+01, float 4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %r = fsub <2 x float> %x, <float 42.0, float -42.0>
+  ret <2 x float> %r
+}
+
+define <2 x float> @constant_op1_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @constant_op1_vec_undef(
+; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[X:%.*]], <float 0x7FF8000000000000, float 4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %r = fsub <2 x float> %x, <float undef, float -42.0>
+  ret <2 x float> %r
+}
+
+; X - (-Y) --> X + Y
+
+define float @neg_op1(float %x, float %y) {
+; CHECK-LABEL: @neg_op1(
+; CHECK-NEXT:    [[R:%.*]] = fadd float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negy = fsub float -0.0, %y
+  %r = fsub float %x, %negy
+  ret float %r
+}
+
+define <2 x float> @neg_op1_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_op1_vec(
+; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negy = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %r = fsub <2 x float> %x, %negy
+  ret <2 x float> %r
+}
+
+define <2 x float> @neg_op1_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_op1_vec_undef(
+; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negy = fsub <2 x float> <float -0.0, float undef>, %y
+  %r = fsub <2 x float> %x, %negy
+  ret <2 x float> %r
+}
+
+; Similar to above - but look through fpext/fptrunc casts to find the fneg.
+
+define double @neg_ext_op1(float %a, double %b) {
+; CHECK-LABEL: @neg_ext_op1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT:    [[T3:%.*]] = fadd double [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret double [[T3]]
+;
+  %t1 = fsub float -0.0, %a
+  %t2 = fpext float %t1 to double
+  %t3 = fsub double %b, %t2
+  ret double %t3
+}
+
+; Verify that vectors work too.
+
+define <2 x float> @neg_trunc_op1(<2 x double> %a, <2 x float> %b) {
+; CHECK-LABEL: @neg_trunc_op1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc <2 x double> [[A:%.*]] to <2 x float>
+; CHECK-NEXT:    [[T3:%.*]] = fadd <2 x float> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[T3]]
+;
+  %t1 = fsub <2 x double> <double -0.0, double -0.0>, %a
+  %t2 = fptrunc <2 x double> %t1 to <2 x float>
+  %t3 = fsub <2 x float> %b, %t2
+  ret <2 x float> %t3
+}
+
+; No FMF needed, but they should propagate to the fadd.
+
+define double @neg_ext_op1_fast(float %a, double %b) {
+; CHECK-LABEL: @neg_ext_op1_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast double [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret double [[T3]]
+;
+  %t1 = fsub float -0.0, %a
+  %t2 = fpext float %t1 to double
+  %t3 = fsub fast double %b, %t2
+  ret double %t3
+}
+
+; Extra use should prevent the transform.
+
+define float @neg_ext_op1_extra_use(half %a, float %b) {
+; CHECK-LABEL: @neg_ext_op1_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fsub half 0xH8000, [[A:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fpext half [[T1]] to float
+; CHECK-NEXT:    [[T3:%.*]] = fsub float [[B:%.*]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fsub half -0.0, %a
+  %t2 = fpext half %t1 to float
+  %t3 = fsub float %b, %t2
+  call void @use(float %t2)
+  ret float %t3
+}
+
+; One-use fptrunc is always hoisted above fneg, so the corresponding
+; multi-use bug for fptrunc isn't visible with a fold starting from
+; the last fsub.
+
+define float @neg_trunc_op1_extra_use(double %a, float %b) {
+; CHECK-LABEL: @neg_trunc_op1_extra_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double [[A:%.*]] to float
+; CHECK-NEXT:    [[T2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[T3:%.*]] = fadd float [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fsub double -0.0, %a
+  %t2 = fptrunc double %t1 to float
+  %t3 = fsub float %b, %t2
+  call void @use(float %t2)
+  ret float %t3
+}
+
+; Extra uses should prevent the transform.
+
+define float @neg_trunc_op1_extra_uses(double %a, float %b) {
+; CHECK-LABEL: @neg_trunc_op1_extra_uses(
+; CHECK-NEXT:    [[T1:%.*]] = fsub double -0.000000e+00, [[A:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fptrunc double [[T1]] to float
+; CHECK-NEXT:    [[T3:%.*]] = fsub float [[B:%.*]], [[T2]]
+; CHECK-NEXT:    call void @use2(float [[T2]], double [[T1]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fsub double -0.0, %a
+  %t2 = fptrunc double %t1 to float
+  %t3 = fsub float %b, %t2
+  call void @use2(float %t2, double %t1)
+  ret float %t3
+}
+
+; Don't negate a constant expression to form fadd and induce infinite looping:
+; https://bugs.llvm.org/show_bug.cgi?id=37605
+
+@b = external global i16, align 1
+
+define float @PR37605(float %conv) {
+; CHECK-LABEL: @PR37605(
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[CONV:%.*]], bitcast (i32 ptrtoint (i16* @b to i32) to float)
+; CHECK-NEXT:    ret float [[SUB]]
+;
+  %sub = fsub float %conv, bitcast (i32 ptrtoint (i16* @b to i32) to float)
+  ret float %sub
+}
+
diff --git a/llvm/test/Transforms/InstCombine/fwrite-1.ll b/llvm/test/Transforms/InstCombine/fwrite-1.ll
new file mode 100644
index 00000000000..10f0b23e1d8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fwrite-1.ll
@@ -0,0 +1,57 @@
+; Test that the fwrite library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@str = constant [1 x i8] zeroinitializer
+@empty = constant [0 x i8] zeroinitializer
+
+declare i64 @fwrite(i8*, i64, i64, %FILE *)
+
+; Check fwrite(S, 1, 1, fp) -> fputc(S[0], fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify1(
+  %str = getelementptr inbounds [1 x i8], [1 x i8]* @str, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i32 @fputc(i32 0, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify2(
+  %str = getelementptr inbounds [0 x i8], [0 x i8]* @empty, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 1, i64 0, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify3(
+  %str = getelementptr inbounds [0 x i8], [0 x i8]* @empty, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 0, i64 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define i64 @test_no_simplify1(%FILE* %fp) {
+; CHECK-LABEL: @test_no_simplify1(
+  %str = getelementptr inbounds [1 x i8], [1 x i8]* @str, i64 0, i64 0
+  %ret = call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i64 @fwrite
+  ret i64 %ret
+; CHECK-NEXT: ret i64 %ret
+}
+
+define void @test_no_simplify2(%FILE* %fp, i64 %size) {
+; CHECK-LABEL: @test_no_simplify2(
+  %str = getelementptr inbounds [1 x i8], [1 x i8]* @str, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 %size, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i64 @fwrite
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/gc.relocate.ll b/llvm/test/Transforms/InstCombine/gc.relocate.ll
new file mode 100644
index 00000000000..78b3b5f42f9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gc.relocate.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+; Uses InstCombine with DataLayout to propagate dereferenceable
+; attribute via gc.relocate: if the derived ptr is dereferenceable(N),
+; then the return attribute of gc.relocate is dereferenceable(N).
+
+declare zeroext i1 @return_i1()
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
+
+define i32 @explicit_nonnull(i32 addrspace(1)* nonnull %dparam) gc "statepoint-example" {
+; Checks that a nonnull pointer
+; CHECK-LABEL: @explicit_nonnull
+; CHECK: ret i32 1
+entry:
+    %load = load i32, i32 addrspace(1)* %dparam
+    %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok,  i32 7, i32 7)
+    %cmp = icmp eq i32 addrspace(1)* %relocate, null
+    %ret_val = select i1 %cmp, i32 0, i32 1
+    ret i32 %ret_val
+}
+
+define i32 @implicit_nonnull(i32 addrspace(1)* %dparam) gc "statepoint-example" {
+; Checks that a nonnull pointer
+; CHECK-LABEL: @implicit_nonnull
+; CHECK: ret i32 1
+entry:
+    %cond = icmp eq i32 addrspace(1)* %dparam, null
+    br i1 %cond, label %no_gc, label %gc
+gc:
+    %load = load i32, i32 addrspace(1)* %dparam
+    %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok,  i32 7, i32 7)
+    %cmp = icmp eq i32 addrspace(1)* %relocate, null
+    %ret_val = select i1 %cmp, i32 0, i32 1
+    ret i32 %ret_val
+no_gc:
+    unreachable
+}
+
+
+; Make sure we don't crash when processing vectors
+define <2 x i8 addrspace(1)*> @vector(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: @vector
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj)
+  %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+  ret <2 x i8 addrspace(1)*> %obj.relocated
+}
+
+declare void @do_safepoint()
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
+declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
diff --git a/llvm/test/Transforms/InstCombine/gep-addrspace.ll b/llvm/test/Transforms/InstCombine/gep-addrspace.ll
new file mode 100644
index 00000000000..fadf2ae6bf6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-addrspace.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-win32"
+
+%myStruct = type { float, [3 x float], [4 x float], i32 }
+
+; make sure that we are not crashing when creating an illegal type
+define void @func(%myStruct addrspace(1)* nocapture %p) nounwind {
+; CHECK-LABEL: @func(
+; CHECK-NEXT:    ret void
+;
+  %A = getelementptr inbounds %myStruct, %myStruct addrspace(1)* %p, i64 0
+  %B = addrspacecast %myStruct addrspace(1)* %A to %myStruct*
+  %C = getelementptr inbounds %myStruct, %myStruct* %B, i32 0, i32 1
+  %D = getelementptr inbounds [3 x float], [3 x float]* %C, i32 0, i32 2
+  %E = load float, float* %D, align 4
+  %F = fsub float %E, undef
+  ret void
+}
+
+@array = internal addrspace(3) global [256 x float] zeroinitializer, align 4
+@scalar = internal addrspace(3) global float 0.000000e+00, align 4
+
+define void @keep_necessary_addrspacecast(i64 %i, float** %out0, float** %out1) {
+; CHECK-LABEL: @keep_necessary_addrspacecast(
+; CHECK-NEXT:    [[T01:%.*]] = getelementptr [256 x float], [256 x float] addrspace(3)* @array, i64 0, i64 [[I:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = addrspacecast float addrspace(3)* [[T01]] to float*
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float addrspace(3)* @scalar, i64 [[I]]
+; CHECK-NEXT:    [[T1:%.*]] = addrspacecast float addrspace(3)* [[TMP1]] to float*
+; CHECK-NEXT:    store float* [[T0]], float** [[OUT0:%.*]], align 4
+; CHECK-NEXT:    store float* [[T1]], float** [[OUT1:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %t0 = getelementptr [256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @array to [256 x float]*), i64 0, i64 %i
+  %t1 = getelementptr [0 x float], [0 x float]* addrspacecast (float addrspace(3)* @scalar to [0 x float]*), i64 0, i64 %i
+  store float* %t0, float** %out0, align 4
+  store float* %t1, float** %out1, align 4
+  ret void
+}
+
+declare void @escape_alloca(i16*)
+
+; check that addrspacecast is not ignored (leading to an assertion failure)
+; when trying to mark a GEP as inbounds
+define { i8, i8 } @inbounds_after_addrspacecast() {
+; CHECK-LABEL: @inbounds_after_addrspacecast(
+; CHECK-NEXT:    [[T0:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @escape_alloca(i16* nonnull [[T0]])
+; CHECK-NEXT:    [[TMPCAST:%.*]] = bitcast i16* [[T0]] to [2 x i8]*
+; CHECK-NEXT:    [[T1:%.*]] = addrspacecast [2 x i8]* [[TMPCAST]] to [2 x i8] addrspace(11)*
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr [2 x i8], [2 x i8] addrspace(11)* [[T1]], i64 0, i64 1
+; CHECK-NEXT:    [[T3:%.*]] = load i8, i8 addrspace(11)* [[T2]], align 1
+; CHECK-NEXT:    [[INSERT:%.*]] = insertvalue { i8, i8 } zeroinitializer, i8 [[T3]], 1
+; CHECK-NEXT:    ret { i8, i8 } [[INSERT]]
+;
+  %t0 = alloca i16, align 2
+  call void @escape_alloca(i16* %t0)
+  %tmpcast = bitcast i16* %t0 to [2 x i8]*
+  %t1 = addrspacecast [2 x i8]* %tmpcast to [2 x i8] addrspace(11)*
+  %t2 = getelementptr [2 x i8], [2 x i8] addrspace(11)* %t1, i64 0, i64 1
+  %t3 = load i8, i8 addrspace(11)* %t2, align 1
+  %insert = insertvalue { i8, i8 } zeroinitializer, i8 %t3, 1
+  ret { i8, i8 } %insert
+}
+
+
+declare spir_func <16 x i32> @my_extern_func()
+
+; check that a bitcast is not generated when we need an addrspace cast
+define void @bitcast_after_gep(<16 x i32>* %t0) {
+; CHECK-LABEL: @bitcast_after_gep(
+; CHECK-NEXT:    [[T4:%.*]] = addrspacecast <16 x i32>* [[T0:%.*]] to <16 x i32> addrspace(3)*
+; CHECK-NEXT:    [[CALL:%.*]] = call spir_func <16 x i32> @my_extern_func()
+; CHECK-NEXT:    store <16 x i32> [[CALL]], <16 x i32> addrspace(3)* [[T4]], align 64
+; CHECK-NEXT:    ret void
+;
+  %t1 = bitcast <16 x i32>* %t0 to [16 x i32]*
+  %t2 = addrspacecast [16 x i32]* %t1 to [16 x i32] addrspace(3)*
+  %t3 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %t2, i64 0, i64 0
+  %t4 = bitcast i32 addrspace(3)* %t3 to <16 x i32> addrspace(3)*
+  %call = call spir_func <16 x i32> @my_extern_func()
+  store <16 x i32> %call, <16 x i32> addrspace(3)* %t4
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
new file mode 100644
index 00000000000..43887caeecb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i8* nocapture readnone %match, i32 %cur_match, i32 %best_len, i32 %scan_end, i32* nocapture readonly %prev, i32 %limit, i32 %chain_length, i8* nocapture readonly %win, i32 %wmask) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX_EXT2:%.*]] = zext i32 [[CUR_MATCH:%.*]] to i64
+; CHECK-NEXT:    [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[WIN:%.*]], i64 [[IDX_EXT2]]
+; CHECK-NEXT:    [[IDX_EXT1:%.*]] = zext i32 [[BEST_LEN:%.*]] to i64
+; CHECK-NEXT:    [[ADD_PTR25:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR4]], i64 [[IDX_EXT1]]
+; CHECK-NEXT:    [[ADD_PTR36:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR25]], i64 -1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[ADD_PTR36]] to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[TMP1]], [[SCAN_END:%.*]]
+; CHECK-NEXT:    br i1 [[CMP7]], label [[DO_END:%.*]], label [[IF_THEN_LR_PH:%.*]]
+; CHECK:       if.then.lr.ph:
+; CHECK-NEXT:    br label [[IF_THEN:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[IDX_EXT:%.*]] = zext i32 [[TMP4:%.*]] to i64
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[WIN]], i64 [[IDX_EXT1]]
+; CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 -1
+; CHECK-NEXT:    [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR2]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[ADD_PTR3]] to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP3]], [[SCAN_END]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_END]], label [[IF_THEN]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[CUR_MATCH_ADDR_09:%.*]] = phi i32 [ [[CUR_MATCH]], [[IF_THEN_LR_PH]] ], [ [[TMP4]], [[DO_BODY:%.*]] ]
+; CHECK-NEXT:    [[CHAIN_LENGTH_ADDR_08:%.*]] = phi i32 [ [[CHAIN_LENGTH:%.*]], [[IF_THEN_LR_PH]] ], [ [[DEC:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[CUR_MATCH_ADDR_09]], [[WMASK:%.*]]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[AND]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREV:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP4]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp ugt i32 [[TMP4]], [[LIMIT:%.*]]
+; CHECK-NEXT:    br i1 [[CMP4]], label [[LAND_LHS_TRUE:%.*]], label [[DO_END]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    [[DEC]] = add i32 [[CHAIN_LENGTH_ADDR_08]], -1
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT:    br i1 [[CMP5]], label [[DO_END]], label [[DO_BODY]]
+; CHECK:       do.end:
+; CHECK-NEXT:    [[CONT_0:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 0, [[IF_THEN]] ], [ 0, [[LAND_LHS_TRUE]] ], [ 1, [[DO_BODY]] ]
+; CHECK-NEXT:    ret i32 [[CONT_0]]
+;
+entry:
+  %idx.ext2 = zext i32 %cur_match to i64
+  %add.ptr4 = getelementptr inbounds i8, i8* %win, i64 %idx.ext2
+  %idx.ext1 = zext i32 %best_len to i64
+  %add.ptr25 = getelementptr inbounds i8, i8* %add.ptr4, i64 %idx.ext1
+  %add.ptr36 = getelementptr inbounds i8, i8* %add.ptr25, i64 -1
+  %0 = bitcast i8* %add.ptr36 to i32*
+  %1 = load i32, i32* %0, align 4
+  %cmp7 = icmp eq i32 %1, %scan_end
+  br i1 %cmp7, label %do.end, label %if.then.lr.ph
+
+if.then.lr.ph:                                    ; preds = %entry
+  br label %if.then
+
+do.body:                                          ; preds = %land.lhs.true
+  %chain_length.addr.0 = phi i32 [ %dec, %land.lhs.true ]
+  %cur_match.addr.0 = phi i32 [ %4, %land.lhs.true ]
+  %idx.ext = zext i32 %cur_match.addr.0 to i64
+  %add.ptr = getelementptr inbounds i8, i8* %win, i64 %idx.ext
+  %add.ptr2 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext1
+  %add.ptr3 = getelementptr inbounds i8, i8* %add.ptr2, i64 -1
+  %2 = bitcast i8* %add.ptr3 to i32*
+  %3 = load i32, i32* %2, align 4
+  %cmp = icmp eq i32 %3, %scan_end
+  br i1 %cmp, label %do.end, label %if.then
+
+if.then:                                          ; preds = %if.then.lr.ph, %do.body
+  %cur_match.addr.09 = phi i32 [ %cur_match, %if.then.lr.ph ], [ %cur_match.addr.0, %do.body ]
+  %chain_length.addr.08 = phi i32 [ %chain_length, %if.then.lr.ph ], [ %chain_length.addr.0, %do.body ]
+  %and = and i32 %cur_match.addr.09, %wmask
+  %idxprom = zext i32 %and to i64
+  %arrayidx = getelementptr inbounds i32, i32* %prev, i64 %idxprom
+  %4 = load i32, i32* %arrayidx, align 4
+  %cmp4 = icmp ugt i32 %4, %limit
+  br i1 %cmp4, label %land.lhs.true, label %do.end
+
+land.lhs.true:                                    ; preds = %if.then
+  %dec = add i32 %chain_length.addr.08, -1
+  %cmp5 = icmp eq i32 %dec, 0
+  br i1 %cmp5, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.body, %land.lhs.true, %if.then, %entry
+  %cont.0 = phi i32 [ 1, %entry ], [ 0, %if.then ], [ 0, %land.lhs.true ], [ 1, %do.body ]
+  ret i32 %cont.0
+}
+
+declare void @blackhole(<2 x i8*>)
+
+define void @PR37005(i8* %base, i8** %in) {
+; CHECK-LABEL: @PR37005(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef
+; CHECK-NEXT:    [[E4:%.*]] = getelementptr inbounds i8*, i8** [[E2]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64>
+; CHECK-NEXT:    [[LR1:%.*]] = lshr <2 x i64> [[PI1]], <i64 21, i64 21>
+; CHECK-NEXT:    [[SL1:%.*]] = shl nuw nsw <2 x i64> [[LR1]], <i64 7, i64 7>
+; CHECK-NEXT:    [[E51:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 80
+; CHECK-NEXT:    [[E6:%.*]] = getelementptr inbounds i8, i8* [[E51]], <2 x i64> [[SL1]]
+; CHECK-NEXT:    call void @blackhole(<2 x i8*> [[E6]])
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %e1 = getelementptr inbounds i8*, i8** %in, i64 undef
+  %e2 = getelementptr inbounds i8*, i8** %e1, i64 6
+  %bc1 = bitcast i8** %e2 to <2 x i8*>*
+  %e3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %bc1, i64 0, i64 0
+  %e4 = getelementptr inbounds i8*, i8** %e3, <2 x i64> <i64 0, i64 1>
+  %pi1 = ptrtoint <2 x i8**> %e4 to <2 x i64>
+  %lr1 = lshr <2 x i64> %pi1, <i64 21, i64 21>
+  %sl1 = shl nuw nsw <2 x i64> %lr1, <i64 7, i64 7>
+  %e5 = getelementptr inbounds i8, i8* %base, <2 x i64> %sl1
+  %e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80
+  call void @blackhole(<2 x i8*> %e6)
+  br label %loop
+}
+
+define void @PR37005_2(i8* %base, i8** %in) {
+; CHECK-LABEL: @PR37005_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef
+; CHECK-NEXT:    [[PI1:%.*]] = ptrtoint i8** [[E2]] to i64
+; CHECK-NEXT:    [[LR1:%.*]] = lshr i64 [[PI1]], 21
+; CHECK-NEXT:    [[SL1:%.*]] = shl nuw nsw i64 [[LR1]], 7
+; CHECK-NEXT:    [[E51:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], <2 x i64> <i64 80, i64 60>
+; CHECK-NEXT:    [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E51]], i64 [[SL1]]
+; CHECK-NEXT:    call void @blackhole(<2 x i8*> [[E6]])
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %e1 = getelementptr inbounds i8*, i8** %in, i64 undef
+  %e2 = getelementptr inbounds i8*, i8** %e1, i64 6
+  %pi1 = ptrtoint i8** %e2 to i64
+  %lr1 = lshr i64 %pi1, 21
+  %sl1 = shl nuw nsw i64 %lr1, 7
+  %e5 = getelementptr inbounds i8, i8* %base, i64 %sl1
+  %e6 = getelementptr inbounds i8, i8* %e5, <2 x i64> <i64 80, i64 60>
+  call void @blackhole(<2 x i8*> %e6)
+  br label %loop
+}
+
+define void @PR37005_3(<2 x i8*> %base, i8** %in) {
+; CHECK-LABEL: @PR37005_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef
+; CHECK-NEXT:    [[E4:%.*]] = getelementptr inbounds i8*, i8** [[E2]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64>
+; CHECK-NEXT:    [[LR1:%.*]] = lshr <2 x i64> [[PI1]], <i64 21, i64 21>
+; CHECK-NEXT:    [[SL1:%.*]] = shl nuw nsw <2 x i64> [[LR1]], <i64 7, i64 7>
+; CHECK-NEXT:    [[E5:%.*]] = getelementptr inbounds i8, <2 x i8*> [[BASE:%.*]], i64 80
+; CHECK-NEXT:    [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E5]], <2 x i64> [[SL1]]
+; CHECK-NEXT:    call void @blackhole(<2 x i8*> [[E6]])
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %e1 = getelementptr inbounds i8*, i8** %in, i64 undef
+  %e2 = getelementptr inbounds i8*, i8** %e1, i64 6
+  %bc1 = bitcast i8** %e2 to <2 x i8*>*
+  %e3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %bc1, i64 0, i64 0
+  %e4 = getelementptr inbounds i8*, i8** %e3, <2 x i64> <i64 0, i64 1>
+  %pi1 = ptrtoint <2 x i8**> %e4 to <2 x i64>
+  %lr1 = lshr <2 x i64> %pi1, <i64 21, i64 21>
+  %sl1 = shl nuw nsw <2 x i64> %lr1, <i64 7, i64 7>
+  %e5 = getelementptr inbounds i8, <2 x i8*> %base, <2 x i64> %sl1
+  %e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80
+  call void @blackhole(<2 x i8*> %e6)
+  br label %loop
+}
diff --git a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll
new file mode 100644
index 00000000000..e22653042a3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll
@@ -0,0 +1,154 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:m-p:40:64:64:32-i32:32-i16:16-i8:8-n32"
+
+%struct.B = type { double }
+%struct.A = type { %struct.B, i32, i32 }
+%struct.C = type { [7 x i8] }
+
+
+@Global = external global [10 x i8]
+
+; Test that two array indexing geps fold
+define i32* @test1(i32* %I) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 21
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr i32, i32* %I, i8 17
+  %B = getelementptr i32, i32* %A, i16 4
+  ret i32* %B
+}
+
+; Test that two getelementptr insts fold
+define i32* @test2({ i32 }* %I) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B:%.*]] = getelementptr { i32 }, { i32 }* [[I:%.*]], i32 1, i32 0
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr { i32 }, { i32 }* %I, i32 1
+  %B = getelementptr { i32 }, { i32 }* %A, i32 0, i32 0
+  ret i32* %B
+}
+
+define void @test3(i8 %B) {
+; This should be turned into a constexpr instead of being an instruction
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    store i8 [[B:%.*]], i8* getelementptr inbounds ([10 x i8], [10 x i8]* @Global, i32 0, i32 4), align 1
+; CHECK-NEXT:    ret void
+;
+  %A = getelementptr [10 x i8], [10 x i8]* @Global, i32 0, i32 4
+  store i8 %B, i8* %A
+  ret void
+}
+
+%as1_ptr_struct = type { i32 addrspace(1)* }
+%as2_ptr_struct = type { i32 addrspace(2)* }
+
+@global_as2 = addrspace(2) global i32 zeroinitializer
+@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 }
+
+; This should be turned into a constexpr instead of being an instruction
+define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs(
+; CHECK-NEXT:    store i32 addrspace(2)* [[B:%.*]], i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct, [[AS2_PTR_STRUCT:%.*]] addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0), align 8
+; CHECK-NEXT:    ret void
+;
+  %A = getelementptr %as2_ptr_struct, %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0
+  store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer
+
+define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array(
+; CHECK-NEXT:    store i8 addrspace(2)* [[B:%.*]], i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i32 0, i32 2), align 16
+; CHECK-NEXT:    ret void
+;
+
+  %A = getelementptr [4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2
+  store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+define i32* @test4(i32* %I, i32 %C, i32 %D) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 [[C:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, i32* [[A]], i32 [[D:%.*]]
+; CHECK-NEXT:    ret i32* [[B]]
+;
+  %A = getelementptr i32, i32* %I, i32 %C
+  %B = getelementptr i32, i32* %A, i32 %D
+  ret i32* %B
+}
+
+
+define i1 @test5({ i32, i32 }* %x, { i32, i32 }* %y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP_4:%.*]] = icmp eq { i32, i32 }* [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP_4]]
+;
+  %tmp.1 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
+  %tmp.3 = getelementptr { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1
+  ;; seteq x, y
+  %tmp.4 = icmp eq i32* %tmp.1, %tmp.3
+  ret i1 %tmp.4
+}
+
+%S = type { i32, [ 100 x i32] }
+
+define <2 x i1> @test6(<2 x i32> %X, <2 x %S*> %P) nounwind {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i32> %X
+  %B = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32*> %A, %B
+  ret <2 x i1> %C
+}
+
+@G = external global [3 x i8]
+define i8* @test7(i16 %Idx) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[ZE_IDX:%.*]] = zext i16 [[IDX:%.*]] to i32
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [3 x i8], [3 x i8]* @G, i32 0, i32 [[ZE_IDX]]
+; CHECK-NEXT:    ret i8* [[TMP]]
+;
+  %ZE_Idx = zext i16 %Idx to i32
+  %tmp = getelementptr i8, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i32 %ZE_Idx
+  ret i8* %tmp
+}
+
+
+; Test folding of constantexpr geps into normal geps.
+@Array = external global [40 x i32]
+define i32 *@test8(i32 %X) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[A:%.*]] = getelementptr [40 x i32], [40 x i32]* @Array, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    ret i32* [[A]]
+;
+  %A = getelementptr i32, i32* getelementptr ([40 x i32], [40 x i32]* @Array, i32 0, i32 0), i32 %X
+  ret i32* %A
+}
+
+define i32 *@test9(i32 *%base, i8 %ind) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[IND:%.*]] to i32
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %res = getelementptr i32, i32 *%base, i8 %ind
+  ret i32* %res
+}
+
+define i32 @test10() {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i32 8
+;
+  %A = getelementptr { i32, double }, { i32, double }* null, i32 0, i32 1
+  %B = ptrtoint double* %A to i32
+  ret i32 %B
+}
diff --git a/llvm/test/Transforms/InstCombine/gep-sext.ll b/llvm/test/Transforms/InstCombine/gep-sext.ll
new file mode 100644
index 00000000000..36e2aeff02c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-sext.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-win32"
+
+declare void @use(i32) readonly
+
+; We prefer to canonicalize the machine width gep indices early
+define void @test(i32* %p, i32 %index) {
+; CHECK-LABEL: @test
+; CHECK-NEXT: %1 = sext i32 %index to i64
+; CHECK-NEXT: %addr = getelementptr i32, i32* %p, i64 %1
+  %addr = getelementptr i32, i32* %p, i32 %index
+  %val = load i32, i32* %addr
+  call void @use(i32 %val)
+  ret void
+}
+; If they've already been canonicalized via zext, that's fine
+define void @test2(i32* %p, i32 %index) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %i = zext i32 %index to i64
+; CHECK-NEXT: %addr = getelementptr i32, i32* %p, i64 %i
+  %i = zext i32 %index to i64
+  %addr = getelementptr i32, i32* %p, i64 %i
+  %val = load i32, i32* %addr
+  call void @use(i32 %val)
+  ret void
+}
+; If we can use a zext, we prefer that.  This requires
+; knowing that the index is positive.
+define void @test3(i32* %p, i32 %index) {
+; CHECK-LABEL: @test3
+; CHECK:   zext
+; CHECK-NOT: sext
+  %addr_begin = getelementptr i32, i32* %p, i64 40
+  %addr_fixed = getelementptr i32, i32* %addr_begin, i64 48
+  %val_fixed = load i32, i32* %addr_fixed, !range !0
+  %addr = getelementptr i32, i32* %addr_begin, i32 %val_fixed
+  %val = load i32, i32* %addr
+  call void @use(i32 %val)
+  ret void
+}
+; Replace sext with zext where possible
+define void @test4(i32* %p, i32 %index) {
+; CHECK-LABEL: @test4
+; CHECK:   zext
+; CHECK-NOT: sext
+  %addr_begin = getelementptr i32, i32* %p, i64 40
+  %addr_fixed = getelementptr i32, i32* %addr_begin, i64 48
+  %val_fixed = load i32, i32* %addr_fixed, !range !0
+  %i = sext i32 %val_fixed to i64
+  %addr = getelementptr i32, i32* %addr_begin, i64 %i
+  %val = load i32, i32* %addr
+  call void @use(i32 %val)
+  ret void
+}
+
+;;  !range !0
+!0 = !{i32 0, i32 2147483647}
+
+
+
diff --git a/llvm/test/Transforms/InstCombine/gep-vector.ll b/llvm/test/Transforms/InstCombine/gep-vector.ll
new file mode 100644
index 00000000000..c0db01eab5b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-vector.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine %s -S | FileCheck %s
+
+@block = global [64 x [8192 x i8]] zeroinitializer, align 1
+
+define <2 x i8*> @vectorindex1() {
+; CHECK-LABEL: @vectorindex1(
+; CHECK-NEXT:    ret <2 x i8*> getelementptr inbounds ([64 x [8192 x i8]], [64 x [8192 x i8]]* @block, <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 2>, <2 x i64> zeroinitializer)
+;
+  %1 = getelementptr inbounds [64 x [8192 x i8]], [64 x [8192 x i8]]* @block, i64 0, <2 x i64> <i64 0, i64 1>, i64 8192
+  ret <2 x i8*> %1
+}
+
+define <2 x i8*> @vectorindex2() {
+; CHECK-LABEL: @vectorindex2(
+; CHECK-NEXT:    ret <2 x i8*> getelementptr inbounds ([64 x [8192 x i8]], [64 x [8192 x i8]]* @block, <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 2>, <2 x i64> <i64 8191, i64 1>)
+;
+  %1 = getelementptr inbounds [64 x [8192 x i8]], [64 x [8192 x i8]]* @block, i64 0, i64 1, <2 x i64> <i64 8191, i64 8193>
+  ret <2 x i8*> %1
+}
+
+define <2 x i8*> @vectorindex3() {
+; CHECK-LABEL: @vectorindex3(
+; CHECK-NEXT:    ret <2 x i8*> getelementptr inbounds ([64 x [8192 x i8]], [64 x [8192 x i8]]* @block, <2 x i64> zeroinitializer, <2 x i64> <i64 0, i64 2>, <2 x i64> <i64 8191, i64 1>)
+;
+  %1 = getelementptr inbounds [64 x [8192 x i8]], [64 x [8192 x i8]]* @block, i64 0, <2 x i64> <i64 0, i64 1>, <2 x i64> <i64 8191, i64 8193>
+  ret <2 x i8*> %1
+}
+
+define i32* @bitcast_vec_to_array_gep(<7 x i32>* %x, i64 %y, i64 %z) {
+; CHECK-LABEL: @bitcast_vec_to_array_gep(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr <7 x i32>, <7 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]
+; CHECK-NEXT:    ret i32* [[GEP]]
+;
+  %arr_ptr = bitcast <7 x i32>* %x to [7 x i32]*
+  %gep = getelementptr [7 x i32], [7 x i32]* %arr_ptr, i64 %y, i64 %z
+  ret i32* %gep
+}
+
+define i32* @bitcast_array_to_vec_gep([3 x i32]* %x, i64 %y, i64 %z) {
+; CHECK-LABEL: @bitcast_array_to_vec_gep(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]
+; CHECK-NEXT:    ret i32* [[GEP]]
+;
+  %vec_ptr = bitcast [3 x i32]* %x to <3 x i32>*
+  %gep = getelementptr inbounds <3 x i32>, <3 x i32>* %vec_ptr, i64 %y, i64 %z
+  ret i32* %gep
+}
+
+define i32 addrspace(3)* @bitcast_vec_to_array_addrspace(<7 x i32>* %x, i64 %y, i64 %z) {
+; CHECK-LABEL: @bitcast_vec_to_array_addrspace(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr <7 x i32>, <7 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)*
+; CHECK-NEXT:    ret i32 addrspace(3)* [[TMP1]]
+;
+  %arr_ptr = bitcast <7 x i32>* %x to [7 x i32]*
+  %asc = addrspacecast [7 x i32]* %arr_ptr to [7 x i32] addrspace(3)*
+  %gep = getelementptr [7 x i32], [7 x i32] addrspace(3)* %asc, i64 %y, i64 %z
+  ret i32 addrspace(3)* %gep
+}
+
+define i32 addrspace(3)* @inbounds_bitcast_vec_to_array_addrspace(<7 x i32>* %x, i64 %y, i64 %z) {
+; CHECK-LABEL: @inbounds_bitcast_vec_to_array_addrspace(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <7 x i32>, <7 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)*
+; CHECK-NEXT:    ret i32 addrspace(3)* [[TMP1]]
+;
+  %arr_ptr = bitcast <7 x i32>* %x to [7 x i32]*
+  %asc = addrspacecast [7 x i32]* %arr_ptr to [7 x i32] addrspace(3)*
+  %gep = getelementptr inbounds [7 x i32], [7 x i32] addrspace(3)* %asc, i64 %y, i64 %z
+  ret i32 addrspace(3)* %gep
+}
diff --git a/llvm/test/Transforms/InstCombine/gepgep.ll b/llvm/test/Transforms/InstCombine/gepgep.ll
new file mode 100644
index 00000000000..24b81aaea3f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gepgep.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@buffer = external global [64 x float]
+
+declare void @use(i8*)
+
+define void @f() {
+  call void @use(i8* getelementptr (i8, i8* getelementptr (i8, i8* bitcast ([64 x float]* @buffer to i8*), i64 and (i64 sub (i64 0, i64 ptrtoint ([64 x float]* @buffer to i64)), i64 63)), i64 64))
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/gepphigep.ll b/llvm/test/Transforms/InstCombine/gepphigep.ll
new file mode 100644
index 00000000000..cc90d714be7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gepphigep.ll
@@ -0,0 +1,186 @@
+; RUN: opt -instcombine -S  < %s | FileCheck %s
+
+%struct1 = type { %struct2*, i32, i32, i32 }
+%struct2 = type { i32, i32 }
+%struct3 = type { i32, %struct4, %struct4 }
+%struct4 = type { %struct2, %struct2 }
+
+define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
+bb:
+  %tmp = getelementptr inbounds %struct1, %struct1* %dm, i64 0, i32 0
+  %tmp1 = load %struct2*, %struct2** %tmp, align 8
+  br i1 %tmp4, label %bb1, label %bb2
+
+bb1:
+  %tmp10 = getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp9
+  %tmp11 = getelementptr inbounds %struct2, %struct2* %tmp10, i64 0, i32 0
+  store i32 0, i32* %tmp11, align 4
+  br label %bb3
+
+bb2:
+  %tmp20 = getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp19
+  %tmp21 = getelementptr inbounds %struct2, %struct2* %tmp20, i64 0, i32 0
+  store i32 0, i32* %tmp21, align 4
+  br label %bb3
+
+bb3:
+  %phi = phi %struct2* [ %tmp10, %bb1 ], [ %tmp20, %bb2 ]
+  %tmp24 = getelementptr inbounds %struct2, %struct2* %phi, i64 0, i32 1
+  %tmp25 = load i32, i32* %tmp24, align 4
+  ret i32 %tmp25
+
+; CHECK-LABEL: @test1(
+; CHECK: getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp9, i32 0
+; CHECK: getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp19, i32 0
+; CHECK: %[[PHI:[0-9A-Za-z]+]] = phi i64 [ %tmp9, %bb1 ], [ %tmp19, %bb2 ]
+; CHECK: getelementptr inbounds %struct2, %struct2* %tmp1, i64 %[[PHI]], i32 1
+
+}
+
+define i32 @test2(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
+bb:
+  %tmp = getelementptr inbounds %struct1, %struct1* %dm, i64 0, i32 0
+  %tmp1 = load %struct2*, %struct2** %tmp, align 8
+  %tmp10 = getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp9
+  %tmp11 = getelementptr inbounds %struct2, %struct2* %tmp10, i64 0, i32 0
+  store i32 0, i32* %tmp11, align 4
+  %tmp20 = getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp19
+  %tmp21 = getelementptr inbounds %struct2, %struct2* %tmp20, i64 0, i32 0
+  store i32 0, i32* %tmp21, align 4
+  %tmp24 = getelementptr inbounds %struct2, %struct2* %tmp10, i64 0, i32 1
+  %tmp25 = load i32, i32* %tmp24, align 4
+  ret i32 %tmp25
+
+; CHECK-LABEL: @test2(
+; CHECK: getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp9, i32 0
+; CHECK: getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp19, i32 0
+; CHECK: getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp9, i32 1
+}
+
+; Check that instcombine doesn't insert GEPs before landingpad.
+
+define i32 @test3(%struct3* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19, i64 %tmp20, i64 %tmp21) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+bb:
+  %tmp = getelementptr inbounds %struct3, %struct3* %dm, i64 0
+  br i1 %tmp4, label %bb1, label %bb2
+
+bb1:
+  %tmp1 = getelementptr inbounds %struct3, %struct3* %tmp, i64 %tmp19, i32 1
+  %tmp11 = getelementptr inbounds %struct4, %struct4* %tmp1, i64 0, i32 0, i32 0
+  store i32 0, i32* %tmp11, align 4
+  br label %bb3
+
+bb2:
+  %tmp2 = getelementptr inbounds %struct3, %struct3* %tmp, i64 %tmp20, i32 1
+  %tmp12 = getelementptr inbounds %struct4, %struct4* %tmp2, i64 0, i32 0, i32 1
+  store i32 0, i32* %tmp12, align 4
+  br label %bb3
+
+bb3:
+  %phi = phi %struct4* [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+  %tmp22 = invoke i32 @foo1(i32 11) to label %bb4 unwind label %bb5
+
+bb4:
+  ret i32 0
+
+bb5:
+  %tmp27 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIi to i8*)
+  %tmp34 = getelementptr inbounds %struct4, %struct4* %phi, i64 %tmp21, i32 1
+  %tmp35 = getelementptr inbounds %struct2, %struct2* %tmp34, i64 0, i32 1
+  %tmp25 = load i32, i32* %tmp35, align 4
+  ret i32 %tmp25
+
+; CHECK-LABEL: @test3(
+; CHECK: bb5:
+; CHECK-NEXT: {{.*}}landingpad { i8*, i32 }
+}
+
+@_ZTIi = external constant i8*
+declare i32 @__gxx_personality_v0(...)
+declare i32 @foo1(i32)
+
+
+; Check that instcombine doesn't fold GEPs into themselves through a loop
+; back-edge.
+
+define i8* @test4(i32 %value, i8* %buffer) {
+entry:
+  %incptr = getelementptr inbounds i8, i8* %buffer, i64 1
+  %cmp = icmp ugt i32 %value, 127
+  br i1 %cmp, label %loop.header, label %exit
+
+loop.header:
+  br label %loop.body
+
+loop.body:
+  %loopptr = phi i8* [ %incptr, %loop.header ], [ %incptr2, %loop.body ]
+  %newval = phi i32 [ %value, %loop.header ], [ %shr, %loop.body ]
+  %shr = lshr i32 %newval, 7
+  %incptr2 = getelementptr inbounds i8, i8* %loopptr, i64 1
+  %cmp2 = icmp ugt i32 %shr, 127
+  br i1 %cmp2, label %loop.body, label %loop.exit
+
+loop.exit:
+  %exitptr = phi i8* [ %incptr2, %loop.body ]
+  br label %exit
+
+exit:
+  %ptr2 = phi i8* [ %exitptr, %loop.exit ], [ %incptr, %entry ]
+  %incptr3 = getelementptr inbounds i8, i8* %ptr2, i64 1
+  ret i8* %incptr3
+
+; CHECK-LABEL: @test4(
+; CHECK: loop.body:
+; CHECK: getelementptr{{.*}}i64 1
+; CHECK: exit:
+}
+
+@.str.4 = external unnamed_addr constant [100 x i8], align 1
+
+; Instcombine shouldn't add new PHI nodes while folding GEPs if that will leave
+; old PHI nodes behind as this is not clearly beneficial.
+; CHECK-LABEL: @test5(
+define void @test5(i16 *%idx, i8 **%in) #0 {
+entry:
+  %0 = load i8*, i8** %in
+  %incdec.ptr = getelementptr inbounds i8, i8* %0, i32 1
+  %1 = load i8, i8* %incdec.ptr, align 1
+  %cmp23 = icmp eq i8 %1, 54
+  br i1 %cmp23, label %while.cond, label %if.then.25
+
+if.then.25:
+  call void @g(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @.str.4, i32 0, i32 0))
+  br label %while.cond
+
+while.cond:
+; CHECK-LABEL: while.cond
+; CHECK-NOT: phi i8* [ %0, %entry ], [ %Ptr, %while.body ], [ %0, %if.then.25 ]
+  %Ptr = phi i8* [ %incdec.ptr, %entry ], [ %incdec.ptr32, %while.body], [%incdec.ptr, %if.then.25 ]
+  %2 = load i8, i8* %Ptr
+  %and = and i8 %2, 64
+  %lnot = icmp eq i8 %and, 0
+  br i1 %lnot, label %while.body, label %while.cond.33
+
+while.body:
+  %incdec.ptr32 = getelementptr inbounds i8, i8* %Ptr, i32 1
+  br label %while.cond
+
+while.cond.33:
+  %incdec.ptr34 = getelementptr inbounds i8, i8* %Ptr, i32 1
+  br label %while.cond.57
+
+while.cond.57:
+  %3 = load i8, i8* %incdec.ptr34, align 1
+  %conv59 = zext i8 %3 to i32
+  %arrayidx61 = getelementptr inbounds i16, i16* %idx, i32 %conv59
+  %4 = load i16, i16* %arrayidx61, align 2
+  %and63 = and i16 %4, 2048
+  %tobool64 = icmp eq i16 %and63, 0
+  br i1 %tobool64, label %while.cond.73, label %while.cond.57
+
+while.cond.73:
+  br label %while.cond.73
+
+}
+
+declare void @g(i8*)
diff --git a/llvm/test/Transforms/InstCombine/getelementptr-folding.ll b/llvm/test/Transforms/InstCombine/getelementptr-folding.ll
new file mode 100644
index 00000000000..11e7e43a6b4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/getelementptr-folding.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+%struct.matrix_float3x3 = type { [3 x <3 x float>] }
+
+; We used to fold this by rewriting the indices to 0, 0, 2, 0.  This is
+; invalid because there is a 4-byte padding after each <3 x float> field.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+@matrix_identity_float3x3 = external global %struct.matrix_float3x3, align 16
+@bbb = global float* getelementptr inbounds (%struct.matrix_float3x3, %struct.matrix_float3x3* @matrix_identity_float3x3, i64 0, i32 0, i64 1, i64 3)
+; CHECK: @bbb = global float* getelementptr inbounds (%struct.matrix_float3x3, %struct.matrix_float3x3* @matrix_identity_float3x3, i64 0, i32 0, i64 1, i64 3)
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
new file mode 100644
index 00000000000..566e15f2893
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -0,0 +1,945 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
+
+%intstruct = type { i32 }
+%pair = type { i32, i32 }
+%struct.B = type { double }
+%struct.A = type { %struct.B, i32, i32 }
+%struct.C = type { [7 x i8] }
+
+
+@Global = external global [10 x i8]
+@Global_as1 = external addrspace(1) global [10 x i8]
+
+; Test noop elimination
+define i32* @test1(i32* %I) {
+        %A = getelementptr i32, i32* %I, i64 0
+        ret i32* %A
+; CHECK-LABEL: @test1(
+; CHECK: ret i32* %I
+}
+
+define i32 addrspace(1)* @test1_as1(i32 addrspace(1)* %I) {
+  %A = getelementptr i32, i32 addrspace(1)* %I, i64 0
+  ret i32 addrspace(1)* %A
+; CHECK-LABEL: @test1_as1(
+; CHECK: ret i32 addrspace(1)* %I
+}
+
+; Test noop elimination
+define i32* @test2(i32* %I) {
+        %A = getelementptr i32, i32* %I
+        ret i32* %A
+; CHECK-LABEL: @test2(
+; CHECK: ret i32* %I
+}
+
+; Test that two array indexing geps fold
+define i32* @test3(i32* %I) {
+        %A = getelementptr i32, i32* %I, i64 17
+        %B = getelementptr i32, i32* %A, i64 4
+        ret i32* %B
+; CHECK-LABEL: @test3(
+; CHECK: getelementptr i32, i32* %I, i64 21
+}
+
+; Test that two getelementptr insts fold
+define i32* @test4({ i32 }* %I) {
+        %A = getelementptr { i32 }, { i32 }* %I, i64 1
+        %B = getelementptr { i32 }, { i32 }* %A, i64 0, i32 0
+        ret i32* %B
+; CHECK-LABEL: @test4(
+; CHECK: getelementptr { i32 }, { i32 }* %I, i64 1, i32 0
+}
+
+define void @test5(i8 %B) {
+        ; This should be turned into a constexpr instead of being an instruction
+        %A = getelementptr [10 x i8], [10 x i8]* @Global, i64 0, i64 4
+        store i8 %B, i8* %A
+        ret void
+; CHECK-LABEL: @test5(
+; CHECK: store i8 %B, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @Global, i64 0, i64 4)
+}
+
+define void @test5_as1(i8 %B) {
+        ; This should be turned into a constexpr instead of being an instruction
+        %A = getelementptr [10 x i8], [10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4
+        store i8 %B, i8 addrspace(1)* %A
+        ret void
+; CHECK-LABEL: @test5_as1(
+; CHECK: store i8 %B, i8 addrspace(1)* getelementptr inbounds ([10 x i8], [10 x i8] addrspace(1)* @Global_as1, i16 0, i16 4)
+}
+
+%as1_ptr_struct = type { i32 addrspace(1)* }
+%as2_ptr_struct = type { i32 addrspace(2)* }
+
+@global_as2 = addrspace(2) global i32 zeroinitializer
+@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 }
+
+; This should be turned into a constexpr instead of being an instruction
+define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs(
+; CHECK-NEXT: store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct, %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0), align 8
+; CHECK-NEXT: ret void
+  %A = getelementptr %as2_ptr_struct, %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i16 0, i32 0
+  store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer
+
+define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) {
+; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array(
+; CHECK-NEXT: store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2), align 4
+
+; CHECK-NEXT: ret void
+  %A = getelementptr [4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2
+  store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A
+  ret void
+}
+
+define i32* @test7(i32* %I, i64 %C, i64 %D) {
+        %A = getelementptr i32, i32* %I, i64 %C
+        %B = getelementptr i32, i32* %A, i64 %D
+        ret i32* %B
+; CHECK-LABEL: @test7(
+; CHECK: %A = getelementptr i32, i32* %I, i64 %C
+; CHECK: %B = getelementptr i32, i32* %A, i64 %D
+}
+
+define i8* @test8([10 x i32]* %X) {
+        ;; Fold into the cast.
+        %A = getelementptr [10 x i32], [10 x i32]* %X, i64 0, i64 0
+        %B = bitcast i32* %A to i8*
+        ret i8* %B
+; CHECK-LABEL: @test8(
+; CHECK: bitcast [10 x i32]* %X to i8*
+}
+
+define i32 @test9() {
+        %A = getelementptr { i32, double }, { i32, double }* null, i32 0, i32 1
+        %B = ptrtoint double* %A to i32
+        ret i32 %B
+; CHECK-LABEL: @test9(
+; CHECK: ret i32 8
+}
+
+define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) {
+        %tmp.1 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
+        %tmp.3 = getelementptr { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1
+        ;; seteq x, y
+        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3
+        ret i1 %tmp.4
+; CHECK-LABEL: @test10(
+; CHECK: icmp eq { i32, i32 }* %x, %y
+}
+
+define i1 @test11({ i32, i32 }* %X) {
+        %P = getelementptr { i32, i32 }, { i32, i32 }* %X, i32 0, i32 0
+        %Q = icmp eq i32* %P, null
+        ret i1 %Q
+; CHECK-LABEL: @test11(
+; CHECK: icmp eq { i32, i32 }* %X, null
+}
+
+
+; PR4748
+define i32 @test12(%struct.A* %a) {
+entry:
+  %g3 = getelementptr %struct.A, %struct.A* %a, i32 0, i32 1
+  store i32 10, i32* %g3, align 4
+
+  %g4 = getelementptr %struct.A, %struct.A* %a, i32 0, i32 0
+
+  %new_a = bitcast %struct.B* %g4 to %struct.A*
+
+  %g5 = getelementptr %struct.A, %struct.A* %new_a, i32 0, i32 1
+  %a_a = load i32, i32* %g5, align 4
+  ret i32 %a_a
+; CHECK-LABEL:      @test12(
+; CHECK:      getelementptr %struct.A, %struct.A* %a, i64 0, i32 1
+; CHECK-NEXT: store i32 10, i32* %g3
+; CHECK-NEXT: ret i32 10
+}
+
+
+; PR2235
+%S = type { i32, [ 100 x i32] }
+define i1 @test13(i64 %X, %S* %P) {
+        %A = getelementptr inbounds %S, %S* %P, i32 0, i32 1, i64 %X
+        %B = getelementptr inbounds %S, %S* %P, i32 0, i32 0
+	%C = icmp eq i32* %A, %B
+	ret i1 %C
+; CHECK-LABEL: @test13(
+; CHECK:    %C = icmp eq i64 %X, -1
+}
+
+; This is a test of icmp + shl nuw in disguise - 4611... is 0x3fff...
+define <2 x i1> @test13_vector(<2 x i64> %X, <2 x %S*> %P) nounwind {
+; CHECK-LABEL: @test13_vector(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i64> %X, <i64 4611686018427387903, i64 4611686018427387903>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = getelementptr inbounds %S, <2 x %S*> %P, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i64> %X
+  %B = getelementptr inbounds %S, <2 x %S*> %P, <2 x i64> <i64 0, i64 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_as1(i16 %X, %S addrspace(1)* %P) {
+; CHECK-LABEL: @test13_as1(
+; CHECK-NEXT:  %C = icmp eq i16 %X, -1
+; CHECK-NEXT: ret i1 %C
+  %A = getelementptr inbounds %S, %S addrspace(1)* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S, %S addrspace(1)* %P, i16 0, i32 0
+  %C = icmp eq i32 addrspace(1)* %A, %B
+  ret i1 %C
+}
+
+; This is a test of icmp + shl nuw in disguise - 16383 is 0x3fff.
+define <2 x i1> @test13_vector_as1(<2 x i16> %X, <2 x %S addrspace(1)*> %P) {
+; CHECK-LABEL: @test13_vector_as1(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i16> %X, <i16 16383, i16 16383>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = getelementptr inbounds %S, <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 1, i32 1>, <2 x i16> %X
+  %B = getelementptr inbounds %S, <2 x %S addrspace(1)*> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 0, i32 0>
+  %C = icmp eq <2 x i32 addrspace(1)*> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test13_i32(i32 %X, %S* %P) {
+; CHECK-LABEL: @test13_i32(
+; CHECK: %C = icmp eq i32 %X, -1
+  %A = getelementptr inbounds %S, %S* %P, i32 0, i32 1, i32 %X
+  %B = getelementptr inbounds %S, %S* %P, i32 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i16(i16 %X, %S* %P) {
+; CHECK-LABEL: @test13_i16(
+; CHECK: %C = icmp eq i16 %X, -1
+  %A = getelementptr inbounds %S, %S* %P, i16 0, i32 1, i16 %X
+  %B = getelementptr inbounds %S, %S* %P, i16 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+define i1 @test13_i128(i128 %X, %S* %P) {
+; CHECK-LABEL: @test13_i128(
+; CHECK: %C = icmp eq i64 %1, -1
+  %A = getelementptr inbounds %S, %S* %P, i128 0, i32 1, i128 %X
+  %B = getelementptr inbounds %S, %S* %P, i128 0, i32 0
+  %C = icmp eq i32* %A, %B
+  ret i1 %C
+}
+
+
+@G = external global [3 x i8]
+define i8* @test14(i32 %Idx) {
+        %idx = zext i32 %Idx to i64
+        %tmp = getelementptr i8, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i64 %idx
+        ret i8* %tmp
+; CHECK-LABEL: @test14(
+; CHECK: getelementptr [3 x i8], [3 x i8]* @G, i64 0, i64 %idx
+}
+
+
+; Test folding of constantexpr geps into normal geps.
+@Array = external global [40 x i32]
+define i32 *@test15(i64 %X) {
+        %A = getelementptr i32, i32* getelementptr ([40 x i32], [40 x i32]* @Array, i64 0, i64 0), i64 %X
+        ret i32* %A
+; CHECK-LABEL: @test15(
+; CHECK: getelementptr [40 x i32], [40 x i32]* @Array, i64 0, i64 %X
+}
+
+
+define i32* @test16(i32* %X, i32 %Idx) {
+        %R = getelementptr i32, i32* %X, i32 %Idx
+        ret i32* %R
+; CHECK-LABEL: @test16(
+; CHECK: sext i32 %Idx to i64
+}
+
+
+define i1 @test17(i16* %P, i32 %I, i32 %J) {
+        %X = getelementptr inbounds i16, i16* %P, i32 %I
+        %Y = getelementptr inbounds i16, i16* %P, i32 %J
+        %C = icmp ult i16* %X, %Y
+        ret i1 %C
+; CHECK-LABEL: @test17(
+; CHECK: %C = icmp slt i32 %I, %J
+}
+
+define i1 @test18(i16* %P, i32 %I) {
+        %X = getelementptr inbounds i16, i16* %P, i32 %I
+        %C = icmp ult i16* %X, %P
+        ret i1 %C
+; CHECK-LABEL: @test18(
+; CHECK: %C = icmp slt i32 %I, 0
+}
+
+; Larger than the pointer size for a non-zero address space
+define i1 @test18_as1(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16, i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than the pointer size for a non-zero address space
+define i1 @test18_as1_i32(i16 addrspace(1)* %P, i32 %I) {
+; CHECK-LABEL: @test18_as1_i32(
+; CHECK-NEXT: %1 = trunc i32 %I to i16
+; CHECK-NEXT: %C = icmp slt i16 %1, 0
+; CHECK-NEXT: ret i1 %C
+  %X = getelementptr inbounds i16, i16 addrspace(1)* %P, i32 %I
+  %C = icmp ult i16 addrspace(1)* %X, %P
+  ret i1 %C
+}
+
+; Smaller than pointer size
+define i1 @test18_i16(i16* %P, i16 %I) {
+; CHECK-LABEL: @test18_i16(
+; CHECK: %C = icmp slt i16 %I, 0
+  %X = getelementptr inbounds i16, i16* %P, i16 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Same as pointer size
+define i1 @test18_i64(i16* %P, i64 %I) {
+; CHECK-LABEL: @test18_i64(
+; CHECK: %C = icmp slt i64 %I, 0
+  %X = getelementptr inbounds i16, i16* %P, i64 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+; Larger than the pointer size
+define i1 @test18_i128(i16* %P, i128 %I) {
+; CHECK-LABEL: @test18_i128(
+; CHECK: %C = icmp slt i64 %1, 0
+  %X = getelementptr inbounds i16, i16* %P, i128 %I
+  %C = icmp ult i16* %X, %P
+  ret i1 %C
+}
+
+define i32 @test19(i32* %P, i32 %A, i32 %B) {
+        %tmp.4 = getelementptr inbounds i32, i32* %P, i32 %A
+        %tmp.9 = getelementptr inbounds i32, i32* %P, i32 %B
+        %tmp.10 = icmp eq i32* %tmp.4, %tmp.9
+        %tmp.11 = zext i1 %tmp.10 to i32
+        ret i32 %tmp.11
+; CHECK-LABEL: @test19(
+; CHECK: icmp eq i32 %A, %B
+}
+
+define i32 @test20(i32* %P, i32 %A, i32 %B) {
+        %tmp.4 = getelementptr inbounds i32, i32* %P, i32 %A
+        %tmp.6 = icmp eq i32* %tmp.4, %P
+        %tmp.7 = zext i1 %tmp.6 to i32
+        ret i32 %tmp.7
+; CHECK-LABEL: @test20(
+; CHECK: icmp eq i32 %A, 0
+}
+
+define i32 @test20_as1(i32 addrspace(1)* %P, i32 %A, i32 %B) {
+  %tmp.4 = getelementptr inbounds i32, i32 addrspace(1)* %P, i32 %A
+  %tmp.6 = icmp eq i32 addrspace(1)* %tmp.4, %P
+  %tmp.7 = zext i1 %tmp.6 to i32
+  ret i32 %tmp.7
+; CHECK-LABEL: @test20_as1(
+; CHECK: icmp eq i16 %1, 0
+}
+
+
+define i32 @test21() {
+        %pbob1 = alloca %intstruct
+        %pbob2 = getelementptr %intstruct, %intstruct* %pbob1
+        %pbobel = getelementptr %intstruct, %intstruct* %pbob2, i64 0, i32 0
+        %rval = load i32, i32* %pbobel
+        ret i32 %rval
+; CHECK-LABEL: @test21(
+; CHECK: getelementptr inbounds %intstruct, %intstruct* %pbob1, i64 0, i32 0
+}
+
+
+@A = global i32 1               ; <i32*> [#uses=1]
+@B = global i32 2               ; <i32*> [#uses=1]
+
+define i1 @test22() {
+        %C = icmp ult i32* getelementptr (i32, i32* @A, i64 1),
+                           getelementptr (i32, i32* @B, i64 2)
+        ret i1 %C
+; CHECK-LABEL: @test22(
+; CHECK: icmp ult (i32* getelementptr inbounds (i32, i32* @A, i64 1), i32* getelementptr (i32, i32* @B, i64 2))
+}
+
+
+%X = type { [10 x i32], float }
+
+define i1 @test23() {
+        %A = getelementptr %X, %X* null, i64 0, i32 0, i64 0                ; <i32*> [#uses=1]
+        %B = icmp ne i32* %A, null              ; <i1> [#uses=1]
+        ret i1 %B
+; CHECK-LABEL: @test23(
+; CHECK: ret i1 false
+}
+
+define void @test25() {
+entry:
+        %tmp = getelementptr { i64, i64, i64, i64 }, { i64, i64, i64, i64 }* null, i32 0, i32 3         ; <i64*> [#uses=1]
+        %tmp.upgrd.1 = load i64, i64* %tmp           ; <i64> [#uses=1]
+        %tmp8.ui = load i64, i64* null               ; <i64> [#uses=1]
+        %tmp8 = bitcast i64 %tmp8.ui to i64             ; <i64> [#uses=1]
+        %tmp9 = and i64 %tmp8, %tmp.upgrd.1             ; <i64> [#uses=1]
+        %sext = trunc i64 %tmp9 to i32          ; <i32> [#uses=1]
+        %tmp27.i = sext i32 %sext to i64                ; <i64> [#uses=1]
+        tail call void @foo25( i32 0, i64 %tmp27.i )
+        unreachable
+; CHECK-LABEL: @test25(
+}
+
+declare void @foo25(i32, i64)
+
+
+; PR1637
+define i1 @test26(i8* %arr) {
+        %X = getelementptr i8, i8* %arr, i32 1
+        %Y = getelementptr i8, i8* %arr, i32 1
+        %test = icmp uge i8* %X, %Y
+        ret i1 %test
+; CHECK-LABEL: @test26(
+; CHECK: ret i1 true
+}
+
+	%struct.__large_struct = type { [100 x i64] }
+	%struct.compat_siginfo = type { i32, i32, i32, { [29 x i32] } }
+	%struct.siginfo_t = type { i32, i32, i32, { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] } }
+	%struct.sigval_t = type { i8* }
+
+define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) {
+entry:
+	%from_addr = alloca %struct.siginfo_t*
+	%tmp344 = load %struct.siginfo_t*, %struct.siginfo_t** %from_addr, align 8
+	%tmp345 = getelementptr %struct.siginfo_t, %struct.siginfo_t* %tmp344, i32 0, i32 3
+	%tmp346 = getelementptr { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }, { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }* %tmp345, i32 0, i32 0
+	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*
+	%tmp348 = getelementptr { i32, i32, %struct.sigval_t }, { i32, i32, %struct.sigval_t }* %tmp346347, i32 0, i32 2
+	%tmp349 = getelementptr %struct.sigval_t, %struct.sigval_t* %tmp348, i32 0, i32 0
+	%tmp349350 = bitcast i8** %tmp349 to i32*
+	%tmp351 = load i32, i32* %tmp349350, align 8
+	%tmp360 = call i32 asm sideeffect "...",
+        "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"( i32 %tmp351,
+         %struct.__large_struct* null, i32 -14, i32 0 )
+	unreachable
+; CHECK-LABEL: @test27(
+}
+
+; PR1978
+	%struct.x = type <{ i8 }>
+@.str = internal constant [6 x i8] c"Main!\00"
+@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"
+
+define i32 @test28() nounwind  {
+entry:
+	%orientations = alloca [1 x [1 x %struct.x]]
+	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8], [6 x i8]* @.str, i32 0, i32 0) ) nounwind
+	%tmp45 = getelementptr inbounds [1 x [1 x %struct.x]], [1 x [1 x %struct.x]]* %orientations, i32 1, i32 0, i32 0
+	%orientations62 = getelementptr [1 x [1 x %struct.x]], [1 x [1 x %struct.x]]* %orientations, i32 0, i32 0, i32 0
+	br label %bb10
+
+bb10:
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb10 ]
+	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1
+	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1
+	%tmp12 = getelementptr inbounds %struct.x, %struct.x* %tmp45, i32 %tmp12.rec
+	%tmp16 = call i32 (i8*, ...) @printf( i8* getelementptr ([12 x i8], [12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
+	%tmp84 = icmp eq %struct.x* %tmp12, %orientations62
+	%indvar.next = add i32 %indvar, 1
+	br i1 %tmp84, label %bb17, label %bb10
+
+bb17:
+	ret i32 0
+; CHECK-LABEL: @test28(
+; CHECK: icmp eq i32 %indvar, 0
+}
+
+declare i32 @puts(i8*)
+
+declare i32 @printf(i8*, ...)
+
+
+
+
+; rdar://6762290
+	%T = type <{ i64, i64, i64 }>
+define i32 @test29(i8* %start, i32 %X) nounwind {
+entry:
+	%tmp3 = load i64, i64* null
+	%add.ptr = getelementptr i8, i8* %start, i64 %tmp3
+	%tmp158 = load i32, i32* null
+	%add.ptr159 = getelementptr %T, %T* null, i32 %tmp158
+	%add.ptr209 = getelementptr i8, i8* %start, i64 0
+	%add.ptr212 = getelementptr i8, i8* %add.ptr209, i32 %X
+	%cmp214 = icmp ugt i8* %add.ptr212, %add.ptr
+	br i1 %cmp214, label %if.then216, label %if.end363
+
+if.then216:
+	ret i32 1
+
+if.end363:
+	ret i32 0
+; CHECK-LABEL: @test29(
+}
+
+
+; PR3694
+define i32 @test30(i32 %m, i32 %n) nounwind {
+entry:
+	%0 = alloca i32, i32 %n, align 4
+	%1 = bitcast i32* %0 to [0 x i32]*
+	call void @test30f(i32* %0) nounwind
+	%2 = getelementptr [0 x i32], [0 x i32]* %1, i32 0, i32 %m
+	%3 = load i32, i32* %2, align 4
+	ret i32 %3
+; CHECK-LABEL: @test30(
+; CHECK: getelementptr i32
+}
+
+declare void @test30f(i32*)
+
+
+
+define i1 @test31(i32* %A) {
+        %B = getelementptr i32, i32* %A, i32 1
+        %C = getelementptr i32, i32* %A, i64 1
+        %V = icmp eq i32* %B, %C
+        ret i1 %V
+; CHECK-LABEL: @test31(
+; CHECK: ret i1 true
+}
+
+
+; PR1345
+define i8* @test32(i8* %v) {
+	%A = alloca [4 x i8*], align 16
+	%B = getelementptr [4 x i8*], [4 x i8*]* %A, i32 0, i32 0
+	store i8* null, i8** %B
+	%C = bitcast [4 x i8*]* %A to { [16 x i8] }*
+	%D = getelementptr { [16 x i8] }, { [16 x i8] }* %C, i32 0, i32 0, i32 8
+	%E = bitcast i8* %D to i8**
+	store i8* %v, i8** %E
+	%F = getelementptr [4 x i8*], [4 x i8*]* %A, i32 0, i32 2
+	%G = load i8*, i8** %F
+	ret i8* %G
+; CHECK-LABEL: @test32(
+; CHECK: %D = getelementptr inbounds [4 x i8*], [4 x i8*]* %A, i64 0, i64 1
+; CHECK: %F = getelementptr inbounds [4 x i8*], [4 x i8*]* %A, i64 0, i64 2
+}
+
+; PR3290
+%struct.Key = type { { i32, i32 } }
+%struct.anon = type <{ i8, [3 x i8], i32 }>
+
+define i32* @test33(%struct.Key* %A) {
+; CHECK-LABEL: @test33(
+; CHECK: getelementptr %struct.Key, %struct.Key* %A, i64 0, i32 0, i32 1
+  %B = bitcast %struct.Key* %A to %struct.anon*
+  %C = getelementptr %struct.anon, %struct.anon* %B, i32 0, i32 2
+  ret i32* %C
+}
+
+define i32 addrspace(1)* @test33_as1(%struct.Key addrspace(1)* %A) {
+; CHECK-LABEL: @test33_as1(
+; CHECK: getelementptr %struct.Key, %struct.Key addrspace(1)* %A, i16 0, i32 0, i32 1
+  %B = bitcast %struct.Key addrspace(1)* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon, %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
+define i32 addrspace(1)* @test33_array_as1([10 x i32] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_as1(
+; CHECK: getelementptr [10 x i32], [10 x i32] addrspace(1)* %A, i16 0, i16 2
+  %B = bitcast [10 x i32] addrspace(1)* %A to [5 x i32] addrspace(1)*
+  %C = getelementptr [5 x i32], [5 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
+; Make sure the GEP indices use the right pointer sized integer
+define i32 addrspace(1)* @test33_array_struct_as1([10 x %struct.Key] addrspace(1)* %A) {
+; CHECK-LABEL: @test33_array_struct_as1(
+; CHECK: getelementptr [10 x %struct.Key], [10 x %struct.Key] addrspace(1)* %A, i16 0, i16 1, i32 0, i32 0
+  %B = bitcast [10 x %struct.Key] addrspace(1)* %A to [20 x i32] addrspace(1)*
+  %C = getelementptr [20 x i32], [20 x i32] addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
+define i32 addrspace(1)* @test33_addrspacecast(%struct.Key* %A) {
+; CHECK-LABEL: @test33_addrspacecast(
+; CHECK: %C = getelementptr %struct.Key, %struct.Key* %A, i64 0, i32 0, i32 1
+; CHECK-NEXT: addrspacecast i32* %C to i32 addrspace(1)*
+; CHECK-NEXT: ret
+  %B = addrspacecast %struct.Key* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon, %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
+	%T2 = type { i8*, i8 }
+define i8* @test34(i8* %Val, i64 %V) nounwind {
+entry:
+	%A = alloca %T2, align 8
+	%mrv_gep = bitcast %T2* %A to i64*
+	%B = getelementptr %T2, %T2* %A, i64 0, i32 0
+
+      	store i64 %V, i64* %mrv_gep
+	%C = load i8*, i8** %B, align 8
+	ret i8* %C
+; CHECK-LABEL: @test34(
+; CHECK: %[[C:.*]] = inttoptr i64 %V to i8*
+; CHECK: ret i8* %[[C]]
+}
+
+%t0 = type { i8*, [19 x i8] }
+%t1 = type { i8*, [0 x i8] }
+
+@array = external global [11 x i8]
+
+@s = external global %t0
+@"\01LC8" = external constant [17 x i8]
+
+; Instcombine should be able to fold this getelementptr.
+
+define i32 @test35() nounwind {
+  call i32 (i8*, ...) @printf(i8* getelementptr ([17 x i8], [17 x i8]* @"\01LC8", i32 0, i32 0),
+             i8* getelementptr (%t1, %t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
+  ret i32 0
+; CHECK-LABEL: @test35(
+; CHECK: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0, %t0* @s, i64 0, i32 1, i64 0)) [[$NUW:#[0-9]+]]
+}
+
+; Don't treat signed offsets as unsigned.
+define i8* @test36() nounwind {
+  ret i8* getelementptr ([11 x i8], [11 x i8]* @array, i32 0, i64 -1)
+; CHECK-LABEL: @test36(
+; CHECK: ret i8* getelementptr ([11 x i8], [11 x i8]* @array, i64 0, i64 -1)
+}
+
+; Instcombine shouldn't assume that gep(A,0,1) != gep(A,1,0).
+@A37 = external constant [1 x i8]
+define i1 @test37() nounwind {
+; CHECK-LABEL: @test37(
+; CHECK: ret i1 true
+  %t = icmp eq i8* getelementptr ([1 x i8], [1 x i8]* @A37, i64 0, i64 1),
+                   getelementptr ([1 x i8], [1 x i8]* @A37, i64 1, i64 0)
+  ret i1 %t
+}
+
+; Test index promotion
+define i32* @test38(i32* %I, i32 %n) {
+        %A = getelementptr i32, i32* %I, i32 %n
+        ret i32* %A
+; CHECK-LABEL: @test38(
+; CHECK: = sext i32 %n to i64
+; CHECK: %A = getelementptr i32, i32* %I, i64 %
+}
+
+; Test that we don't duplicate work when the second gep is a "bitcast".
+%pr10322_t = type { i8* }
+declare void @pr10322_f2(%pr10322_t*)
+declare void @pr10322_f3(i8**)
+define void @pr10322_f1(%pr10322_t* %foo) {
+entry:
+  %arrayidx8 = getelementptr inbounds %pr10322_t, %pr10322_t* %foo, i64 2
+  call void @pr10322_f2(%pr10322_t* %arrayidx8) nounwind
+  %tmp2 = getelementptr inbounds %pr10322_t, %pr10322_t* %arrayidx8, i64 0, i32 0
+  call void @pr10322_f3(i8** %tmp2) nounwind
+  ret void
+
+; CHECK-LABEL: @pr10322_f1(
+; CHECK: %tmp2 = getelementptr inbounds %pr10322_t, %pr10322_t* %arrayidx8, i64 0, i32 0
+}
+
+; Test that we combine the last two geps in this sequence, before we
+; would wait for gep1 and gep2 to be combined and never combine 2 and 3.
+%three_gep_t = type {i32}
+%three_gep_t2 = type {%three_gep_t}
+
+define void @three_gep_f(%three_gep_t2* %x) {
+  %gep1 = getelementptr %three_gep_t2, %three_gep_t2* %x, i64 2
+  call void @three_gep_h(%three_gep_t2* %gep1)
+  %gep2 = getelementptr %three_gep_t2, %three_gep_t2* %gep1, i64 0, i32 0
+  %gep3 = getelementptr %three_gep_t, %three_gep_t* %gep2, i64 0, i32 0
+  call void @three_gep_g(i32* %gep3)
+
+; CHECK-LABEL: @three_gep_f(
+; CHECK: %gep3 = getelementptr %three_gep_t2, %three_gep_t2* %gep1, i64 0, i32 0, i32 0
+  ret void
+}
+
+declare void @three_gep_g(i32*)
+declare void @three_gep_h(%three_gep_t2*)
+
+%struct.ham = type { i32, %struct.zot*, %struct.zot*, %struct.zot* }
+%struct.zot = type { i64, i8 }
+
+define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
+  %tmp = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 2
+  %tmp2 = load %struct.zot*, %struct.zot** %tmp, align 8
+  %tmp3 = bitcast %struct.zot* %tmp2 to i8*
+  %tmp4 = getelementptr inbounds i8, i8* %tmp3, i64 -8
+  store i8 %arg1, i8* %tmp4, align 8
+  ret void
+
+; CHECK-LABEL: @test39(
+; CHECK: getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 2
+; CHECK: getelementptr inbounds i8, i8* %{{.+}}, i64 -8
+}
+
+define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
+  %c = getelementptr [1 x i8], [1 x i8]* %a, i32 0, i32 0
+  %d = getelementptr [1 x i8], [1 x i8]* %b, i32 0, i32 0
+  %cmp = icmp ult i8* %c, %d
+  ret i1 %cmp
+
+; CHECK-LABEL: @pr16483(
+; CHECK-NEXT: icmp ult  [1 x i8]* %a, %b
+}
+
+define i8 @test_gep_bitcast_as1(i32 addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_as1(
+; CHECK: getelementptr i32, i32 addrspace(1)* %arr, i16 %N
+; CHECK: bitcast
+  %cast = bitcast i32 addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 4
+  %t = getelementptr i8, i8 addrspace(1)* %cast, i16 %V
+  %x = load i8, i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+; The element size of the array matches the element size of the pointer
+define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element(
+; CHECK: getelementptr [100 x double], [100 x double]* %arr, i64 0, i64 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i64*
+  %V = mul i64 %N, 8
+  %t = getelementptr i64, i64* %cast, i64 %V
+  %x = load i64, i64* %t
+  ret i64 %x
+}
+
+; gep should be done in the original address space.
+define i64 @test_gep_bitcast_array_same_size_element_addrspacecast([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast(
+; CHECK: getelementptr [100 x double], [100 x double]* %arr, i64 0, i64 %V
+; CHECK-NEXT: bitcast double*
+; CHECK-NEXT: %t = addrspacecast i64*
+; CHECK: load i64, i64 addrspace(3)* %t
+  %cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)*
+  %V = mul i64 %N, 8
+  %t = getelementptr i64, i64 addrspace(3)* %cast, i64 %V
+  %x = load i64, i64 addrspace(3)* %t
+  ret i64 %x
+}
+
+; The element size of the array is different the element size of the pointer
+define i8 @test_gep_bitcast_array_different_size_element([100 x double]* %arr, i64 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element(
+; CHECK: getelementptr [100 x double], [100 x double]* %arr, i64 0, i64 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double]* %arr to i8*
+  %V = mul i64 %N, 8
+  %t = getelementptr i8, i8* %cast, i64 %V
+  %x = load i8, i8* %t
+  ret i8 %x
+}
+
+define i64 @test_gep_bitcast_array_same_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_as1(
+; CHECK: getelementptr [100 x double], [100 x double] addrspace(1)* %arr, i16 0, i16 %V
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i64 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i64, i64 addrspace(1)* %cast, i16 %V
+  %x = load i64, i64 addrspace(1)* %t
+  ret i64 %x
+}
+
+define i8 @test_gep_bitcast_array_different_size_element_as1([100 x double] addrspace(1)* %arr, i16 %N) {
+; CHECK-LABEL: @test_gep_bitcast_array_different_size_element_as1(
+; CHECK: getelementptr [100 x double], [100 x double] addrspace(1)* %arr, i16 0, i16 %N
+; CHECK: bitcast
+  %cast = bitcast [100 x double] addrspace(1)* %arr to i8 addrspace(1)*
+  %V = mul i16 %N, 8
+  %t = getelementptr i8, i8 addrspace(1)* %cast, i16 %V
+  %x = load i8, i8 addrspace(1)* %t
+  ret i8 %x
+}
+
+define i64 @test40() {
+  %array = alloca [3 x i32], align 4
+  %gep = getelementptr inbounds [3 x i32], [3 x i32]* %array, i64 0, i64 2
+  %gepi8 = bitcast i32* %gep to i8*
+  %p = ptrtoint [3 x i32]* %array to i64
+  %np = sub i64 0, %p
+  %gep2 = getelementptr i8, i8* %gepi8, i64 %np
+  %ret = ptrtoint i8* %gep2 to i64
+  ret i64 %ret
+
+; CHECK-LABEL: @test40
+; CHECK-NEXT: ret i64 8
+}
+
+define i16 @test41([3 x i32] addrspace(1)* %array) {
+  %gep = getelementptr inbounds [3 x i32], [3 x i32] addrspace(1)* %array, i16 0, i16 2
+  %gepi8 = bitcast i32 addrspace(1)* %gep to i8 addrspace(1)*
+  %p = ptrtoint [3 x i32] addrspace(1)* %array to i16
+  %np = sub i16 0, %p
+  %gep2 = getelementptr i8, i8 addrspace(1)* %gepi8, i16 %np
+  %ret = ptrtoint i8 addrspace(1)* %gep2 to i16
+  ret i16 %ret
+
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: ret i16 8
+}
+
+define i8* @test42(i8* %c1, i8* %c2) {
+  %ptrtoint = ptrtoint i8* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %gep = getelementptr inbounds i8, i8* %c2, i64 %sub
+  ret i8* %gep
+
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint i8* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint i8* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to i8*
+; CHECK-NEXT:  ret i8* [[INTTOPTR]]
+}
+
+define i16* @test43(i16* %c1, i16* %c2) {
+  %ptrtoint = ptrtoint i16* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %shr = ashr i64 %sub, 1
+  %gep = getelementptr inbounds i16, i16* %c2, i64 %shr
+  ret i16* %gep
+
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint i16* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint i16* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to i16*
+; CHECK-NEXT:  ret i16* [[INTTOPTR]]
+}
+
+define %struct.C* @test44(%struct.C* %c1, %struct.C* %c2) {
+  %ptrtoint = ptrtoint %struct.C* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %shr = sdiv i64 %sub, 7
+  %gep = getelementptr inbounds %struct.C, %struct.C* %c2, i64 %shr
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint %struct.C* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint %struct.C* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to %struct.C*
+; CHECK-NEXT:  ret %struct.C* [[INTTOPTR]]
+}
+
+define %struct.C* @test45(%struct.C* %c1, %struct.C** %c2) {
+  %ptrtoint1 = ptrtoint %struct.C* %c1 to i64
+  %ptrtoint2 = ptrtoint %struct.C** %c2 to i64
+  %sub = sub i64 %ptrtoint2, %ptrtoint1 ; C2 - C1
+  %shr = sdiv i64 %sub, 7
+  %gep = getelementptr inbounds %struct.C, %struct.C* %c1, i64 %shr ; C1 + (C2 - C1)
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:  [[BITCAST:%.*]] = bitcast %struct.C** %c2 to %struct.C*
+; CHECK-NEXT:  ret %struct.C* [[BITCAST]]
+}
+
+define %struct.C* @test46(%struct.C* %c1, %struct.C* %c2, i64 %N) {
+  %ptrtoint = ptrtoint %struct.C* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %sdiv = sdiv i64 %sub, %N
+  %gep = getelementptr inbounds %struct.C, %struct.C* %c2, i64 %sdiv
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:  [[PTRTOINT:%.*]] = ptrtoint %struct.C* %c1 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 0, [[PTRTOINT]]
+; CHECK-NEXT:  [[SDIV:%.*]] = sdiv i64 [[SUB]], %N
+; CHECK-NEXT:  [[GEP:%.*]] = getelementptr inbounds %struct.C, %struct.C* %c2, i64 %sdiv
+; CHECK-NEXT:  ret %struct.C* [[GEP]]
+}
+
+define i32* @test47(i32* %I, i64 %C, i64 %D) {
+  %sub = sub i64 %D, %C
+  %A = getelementptr i32, i32* %I, i64 %C
+  %B = getelementptr i32, i32* %A, i64 %sub
+  ret i32* %B
+; CHECK-LABEL: @test47(
+; CHECK-NEXT: %B = getelementptr i32, i32* %I, i64 %D
+}
+
+define i32* @test48(i32* %I, i64 %C, i64 %D) {
+  %sub = sub i64 %D, %C
+  %A = getelementptr i32, i32* %I, i64 %sub
+  %B = getelementptr i32, i32* %A, i64 %C
+  ret i32* %B
+; CHECK-LABEL: @test48(
+; CHECK-NEXT: %B = getelementptr i32, i32* %I, i64 %D
+}
+
+define i32* @test49(i32* %I, i64 %C) {
+  %notC = xor i64 -1, %C
+  %A = getelementptr i32, i32* %I, i64 %C
+  %B = getelementptr i32, i32* %A, i64 %notC
+  ret i32* %B
+; CHECK-LABEL: @test49(
+; CHECK-NEXT: %B = getelementptr i32, i32* %I, i64 -1
+}
+
+define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind {
+; CHECK-LABEL: @ascast_0_gep(
+; CHECK-NOT: getelementptr
+; CHECK: ret
+  %gep = getelementptr i32, i32* %p, i32 0
+  %x = addrspacecast i32* %gep to i32 addrspace(1)*
+  ret i32 addrspace(1)* %x
+}
+
+; Do not merge the GEP and the addrspacecast, because it would undo the
+; addrspacecast canonicalization.
+define i32 addrspace(1)* @ascast_0_0_gep([128 x i32]* %p) nounwind {
+; CHECK-LABEL: @ascast_0_0_gep(
+; CHECK-NEXT: getelementptr [128 x i32]
+; CHECK-NEXT: addrspacecast i32*
+; CHECK-NEXT: ret i32 addrspace(1)*
+  %gep = getelementptr [128 x i32], [128 x i32]* %p, i32 0, i32 0
+  %x = addrspacecast i32* %gep to i32 addrspace(1)*
+  ret i32 addrspace(1)* %x
+}
+
+define <2 x i32*> @PR32414(i32** %ptr) {
+; CHECK-LABEL: @PR32414(
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32** %ptr to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    ret <2 x i32*> [[TMP1]]
+;
+  %tmp0 = bitcast i32** %ptr to i32*
+  %tmp1 = getelementptr inbounds i32, i32* %tmp0, <2 x i64> <i64 0, i64 1>
+  ret <2 x i32*> %tmp1
+}
+
+; CHECK: attributes [[$NUW]] = { nounwind }
diff --git a/llvm/test/Transforms/InstCombine/hoist_instr.ll b/llvm/test/Transforms/InstCombine/hoist_instr.ll
new file mode 100644
index 00000000000..fa451bcc727
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/hoist_instr.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;; This tests that the div is hoisted into the then block.
+define i32 @foo(i1 %C, i32 %A, i32 %B) {
+entry:
+        br i1 %C, label %then, label %endif
+
+then:           ; preds = %entry
+; CHECK: then:
+; CHECK-NEXT: sdiv i32
+        br label %endif
+
+endif:          ; preds = %then, %entry
+        %X = phi i32 [ %A, %then ], [ 15, %entry ]              ; <i32> [#uses=1]
+        %Y = sdiv i32 %X, 42            ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll
new file mode 100644
index 00000000000..86d4d7c5735
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-add.ll
@@ -0,0 +1,465 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1949
+
+define i1 @test1(i32 %a) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[A:%.*]], -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add i32 %a, 4
+  %c = icmp ult i32 %b, 4
+  ret i1 %c
+}
+
+define <2 x i1> @test1vec(<2 x i32> %a) {
+; CHECK-LABEL: @test1vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 -5, i32 -5>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add <2 x i32> %a, <i32 4, i32 4>
+  %c = icmp ult <2 x i32> %b, <i32 4, i32 4>
+  ret <2 x i1> %c
+}
+
+define i1 @test2(i32 %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[A:%.*]], 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = sub i32 %a, 4
+  %c = icmp ugt i32 %b, -5
+  ret i1 %c
+}
+
+define <2 x i1> @test2vec(<2 x i32> %a) {
+; CHECK-LABEL: @test2vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = sub <2 x i32> %a, <i32 4, i32 4>
+  %c = icmp ugt <2 x i32> %b, <i32 -5, i32 -5>
+  ret <2 x i1> %c
+}
+
+define i1 @test3(i32 %a) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[A:%.*]], 2147483643
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add i32 %a, 4
+  %c = icmp slt i32 %b, 2147483652
+  ret i1 %c
+}
+
+define <2 x i1> @test3vec(<2 x i32> %a) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 2147483643, i32 2147483643>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add <2 x i32> %a, <i32 4, i32 4>
+  %c = icmp slt <2 x i32> %b, <i32 2147483652, i32 2147483652>
+  ret <2 x i1> %c
+}
+
+define i1 @test4(i32 %a) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A:%.*]], -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add i32 %a, 2147483652
+  %c = icmp sge i32 %b, 4
+  ret i1 %c
+}
+
+define { i32, i1 } @test4multiuse(i32 %a) {
+; CHECK-LABEL: @test4multiuse(
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[A:%.*]], -2147483644
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[B]], -4
+; CHECK-NEXT:    [[TMP:%.*]] = insertvalue { i32, i1 } undef, i32 [[B]], 0
+; CHECK-NEXT:    [[RES:%.*]] = insertvalue { i32, i1 } [[TMP]], i1 [[C]], 1
+; CHECK-NEXT:    ret { i32, i1 } [[RES]]
+;
+
+  %b = add i32 %a, -2147483644
+  %c = icmp slt i32 %b, -4
+
+  %tmp = insertvalue { i32, i1 } undef, i32 %b, 0
+  %res = insertvalue { i32, i1 } %tmp, i1 %c, 1
+
+  ret { i32, i1 } %res
+}
+
+define <2 x i1> @test4vec(<2 x i32> %a) {
+; CHECK-LABEL: @test4vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], <i32 -4, i32 -4>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add <2 x i32> %a, <i32 2147483652, i32 2147483652>
+  %c = icmp sge <2 x i32> %b, <i32 4, i32 4>
+  ret <2 x i1> %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; This becomes equality because it's at the limit.
+
+define i1 @nsw_slt1(i8 %a) {
+; CHECK-LABEL: @nsw_slt1(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A:%.*]], -128
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp slt i8 %b, -27
+  ret i1 %c
+}
+
+define <2 x i1> @nsw_slt1_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @nsw_slt1_splat_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add nsw <2 x i8> %a, <i8 100, i8 100>
+  %c = icmp slt <2 x i8> %b, <i8 -27, i8 -27>
+  ret <2 x i1> %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; This becomes equality because it's at the limit.
+
+define i1 @nsw_slt2(i8 %a) {
+; CHECK-LABEL: @nsw_slt2(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A:%.*]], 127
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, -100
+  %c = icmp slt i8 %b, 27
+  ret i1 %c
+}
+
+define <2 x i1> @nsw_slt2_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @nsw_slt2_splat_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add nsw <2 x i8> %a, <i8 -100, i8 -100>
+  %c = icmp slt <2 x i8> %b, <i8 27, i8 27>
+  ret <2 x i1> %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Less than the limit, so the predicate doesn't change.
+
+define i1 @nsw_slt3(i8 %a) {
+; CHECK-LABEL: @nsw_slt3(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[A:%.*]], -126
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp slt i8 %b, -26
+  ret i1 %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Less than the limit, so the predicate doesn't change.
+
+define i1 @nsw_slt4(i8 %a) {
+; CHECK-LABEL: @nsw_slt4(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[A:%.*]], 126
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, -100
+  %c = icmp slt i8 %b, 26
+  ret i1 %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Try sgt to make sure that works too.
+
+define i1 @nsw_sgt1(i8 %a) {
+; CHECK-LABEL: @nsw_sgt1(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A:%.*]], 127
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, -100
+  %c = icmp sgt i8 %b, 26
+  ret i1 %c
+}
+
+define <2 x i1> @nsw_sgt1_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @nsw_sgt1_splat_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add nsw <2 x i8> %a, <i8 -100, i8 -100>
+  %c = icmp sgt <2 x i8> %b, <i8 26, i8 26>
+  ret <2 x i1> %c
+}
+
+define i1 @nsw_sgt2(i8 %a) {
+; CHECK-LABEL: @nsw_sgt2(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[A:%.*]], -126
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp sgt i8 %b, -26
+  ret i1 %c
+}
+
+define <2 x i1> @nsw_sgt2_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @nsw_sgt2_splat_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i8> [[A:%.*]], <i8 -126, i8 -126>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add nsw <2 x i8> %a, <i8 100, i8 100>
+  %c = icmp sgt <2 x i8> %b, <i8 -26, i8 -26>
+  ret <2 x i1> %c
+}
+
+; icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2), when C - C2 does not overflow.
+; Comparison with 0 doesn't need special-casing.
+
+define i1 @slt_zero_add_nsw(i32 %a) {
+; CHECK-LABEL: @slt_zero_add_nsw(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %a, 1
+  %cmp = icmp slt i32 %add, 0
+  ret i1 %cmp
+}
+
+; The same fold should work with vectors.
+
+define <2 x i1> @slt_zero_add_nsw_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @slt_zero_add_nsw_splat_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %add = add nsw <2 x i8> %a, <i8 1, i8 1>
+  %cmp = icmp slt <2 x i8> %add, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+; Test the edges - instcombine should not interfere with simplification to constants.
+; Constant subtraction does not overflow, but this is false.
+
+define i1 @nsw_slt3_ov_no(i8 %a) {
+; CHECK-LABEL: @nsw_slt3_ov_no(
+; CHECK-NEXT:    ret i1 false
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp slt i8 %b, -28
+  ret i1 %c
+}
+
+; Test the edges - instcombine should not interfere with simplification to constants.
+; Constant subtraction overflows. This is false.
+
+define i1 @nsw_slt4_ov(i8 %a) {
+; CHECK-LABEL: @nsw_slt4_ov(
+; CHECK-NEXT:    ret i1 false
+;
+  %b = add nsw i8 %a, 100
+  %c = icmp slt i8 %b, -29
+  ret i1 %c
+}
+
+; Test the edges - instcombine should not interfere with simplification to constants.
+; Constant subtraction overflows. This is true.
+
+define i1 @nsw_slt5_ov(i8 %a) {
+; CHECK-LABEL: @nsw_slt5_ov(
+; CHECK-NEXT:    ret i1 true
+;
+  %b = add nsw i8 %a, -100
+  %c = icmp slt i8 %b, 28
+  ret i1 %c
+}
+
+; InstCombine should not thwart this opportunity to simplify completely.
+
+define i1 @slt_zero_add_nsw_signbit(i8 %x) {
+; CHECK-LABEL: @slt_zero_add_nsw_signbit(
+; CHECK-NEXT:    ret i1 true
+;
+  %y = add nsw i8 %x, -128
+  %z = icmp slt i8 %y, 0
+  ret i1 %z
+}
+
+; InstCombine should not thwart this opportunity to simplify completely.
+
+define i1 @slt_zero_add_nuw_signbit(i8 %x) {
+; CHECK-LABEL: @slt_zero_add_nuw_signbit(
+; CHECK-NEXT:    ret i1 true
+;
+  %y = add nuw i8 %x, 128
+  %z = icmp slt i8 %y, 0
+  ret i1 %z
+}
+
+define i1 @reduce_add_ult(i32 %in) {
+; CHECK-LABEL: @reduce_add_ult(
+; CHECK-NEXT:    [[A18:%.*]] = icmp ult i32 [[IN:%.*]], 9
+; CHECK-NEXT:    ret i1 [[A18]]
+;
+  %a6 = add nuw i32 %in, 3
+  %a18 = icmp ult i32 %a6, 12
+  ret i1 %a18
+}
+
+define i1 @reduce_add_ugt(i32 %in) {
+; CHECK-LABEL: @reduce_add_ugt(
+; CHECK-NEXT:    [[A18:%.*]] = icmp ugt i32 [[IN:%.*]], 9
+; CHECK-NEXT:    ret i1 [[A18]]
+;
+  %a6 = add nuw i32 %in, 3
+  %a18 = icmp ugt i32 %a6, 12
+  ret i1 %a18
+}
+
+define i1 @reduce_add_ule(i32 %in) {
+; CHECK-LABEL: @reduce_add_ule(
+; CHECK-NEXT:    [[A18:%.*]] = icmp ult i32 [[IN:%.*]], 10
+; CHECK-NEXT:    ret i1 [[A18]]
+;
+  %a6 = add nuw i32 %in, 3
+  %a18 = icmp ule i32 %a6, 12
+  ret i1 %a18
+}
+
+define i1 @reduce_add_uge(i32 %in) {
+; CHECK-LABEL: @reduce_add_uge(
+; CHECK-NEXT:    [[A18:%.*]] = icmp ugt i32 [[IN:%.*]], 8
+; CHECK-NEXT:    ret i1 [[A18]]
+;
+  %a6 = add nuw i32 %in, 3
+  %a18 = icmp uge i32 %a6, 12
+  ret i1 %a18
+}
+
+define i1 @ult_add_ssubov(i32 %in) {
+; CHECK-LABEL: @ult_add_ssubov(
+; CHECK-NEXT:    ret i1 false
+;
+  %a6 = add nuw i32 %in, 71
+  %a18 = icmp ult i32 %a6, 3
+  ret i1 %a18
+}
+
+define i1 @ult_add_nonuw(i8 %in) {
+; CHECK-LABEL: @ult_add_nonuw(
+; CHECK-NEXT:    [[A6:%.*]] = add i8 [[IN:%.*]], 71
+; CHECK-NEXT:    [[A18:%.*]] = icmp ult i8 [[A6]], 12
+; CHECK-NEXT:    ret i1 [[A18]]
+;
+  %a6 = add i8 %in, 71
+  %a18 = icmp ult i8 %a6, 12
+  ret i1 %a18
+}
+
+define i1 @uge_add_nonuw(i32 %in) {
+; CHECK-LABEL: @uge_add_nonuw(
+; CHECK-NEXT:    [[A6:%.*]] = add i32 [[IN:%.*]], 3
+; CHECK-NEXT:    [[A18:%.*]] = icmp ugt i32 [[A6]], 11
+; CHECK-NEXT:    ret i1 [[A18]]
+;
+  %a6 = add i32 %in, 3
+  %a18 = icmp uge i32 %a6, 12
+  ret i1 %a18
+}
+
+; Test unsigned add overflow patterns. The div ops are only here to
+; thwart complexity based canonicalization of the operand order.
+
+define i1 @op_ugt_sum_commute1(i8 %p1, i8 %p2) {
+; CHECK-LABEL: @op_ugt_sum_commute1(
+; CHECK-NEXT:    [[X:%.*]] = sdiv i8 42, [[P1:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i8 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[Y]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %x = sdiv i8 42, %p1
+  %y = sdiv i8 42, %p2
+  %a = add i8 %x, %y
+  %c = icmp ugt i8 %x, %a
+  ret i1 %c
+}
+
+define <2 x i1> @op_ugt_sum_vec_commute2(<2 x i8> %p1, <2 x i8> %p2) {
+; CHECK-LABEL: @op_ugt_sum_vec_commute2(
+; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 -42>, [[P1:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i8> <i8 42, i8 -42>, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i8> [[Y]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %x = sdiv <2 x i8> <i8 42, i8 -42>, %p1
+  %y = sdiv <2 x i8> <i8 42, i8 -42>, %p2
+  %a = add <2 x i8> %y, %x
+  %c = icmp ugt <2 x i8> %x, %a
+  ret <2 x i1> %c
+}
+
+define i1 @sum_ugt_op_uses(i8 %p1, i8 %p2, i8* %p3) {
+; CHECK-LABEL: @sum_ugt_op_uses(
+; CHECK-NEXT:    [[X:%.*]] = sdiv i8 42, [[P1:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i8 42, [[P2:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X]], [[Y]]
+; CHECK-NEXT:    store i8 [[A]], i8* [[P3:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[X]], [[A]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %x = sdiv i8 42, %p1
+  %y = sdiv i8 42, %p2
+  %a = add i8 %x, %y
+  store i8 %a, i8* %p3
+  %c = icmp ugt i8 %x, %a
+  ret i1 %c
+}
+
+define <2 x i1> @sum_ult_op_vec_commute1(<2 x i8> %p1, <2 x i8> %p2) {
+; CHECK-LABEL: @sum_ult_op_vec_commute1(
+; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 -42>, [[P1:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i8> <i8 -42, i8 42>, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i8> [[Y]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %x = sdiv <2 x i8> <i8 42, i8 -42>, %p1
+  %y = sdiv <2 x i8> <i8 -42, i8 42>, %p2
+  %a = add <2 x i8> %x, %y
+  %c = icmp ult <2 x i8> %a, %x
+  ret <2 x i1> %c
+}
+
+define i1 @sum_ult_op_commute2(i8 %p1, i8 %p2) {
+; CHECK-LABEL: @sum_ult_op_commute2(
+; CHECK-NEXT:    [[X:%.*]] = sdiv i8 42, [[P1:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i8 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[Y]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %x = sdiv i8 42, %p1
+  %y = sdiv i8 42, %p2
+  %a = add i8 %y, %x
+  %c = icmp ult i8 %a, %x
+  ret i1 %c
+}
+
+define i1 @sum_ult_op_uses(i8 %x, i8 %y, i8* %p) {
+; CHECK-LABEL: @sum_ult_op_uses(
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    store i8 [[A]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[A]], [[X]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = add i8 %y, %x
+  store i8 %a, i8* %p
+  %c = icmp ult i8 %a, %x
+  ret i1 %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-bc-vec.ll b/llvm/test/Transforms/InstCombine/icmp-bc-vec.ll
new file mode 100644
index 00000000000..26252d2b402
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-bc-vec.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Tests to verify proper functioning of the icmp folding implemented in
+;  InstCombiner::foldICmpBitCastConstant
+; Specifically, folding:
+;   icmp <pred> iN X, C
+;  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
+;    and C is a splat of a K-bit pattern
+;    and SC is a constant vector = <C', C', C', ..., C'>
+; Into:
+;  %E = extractelement <M x iK> %vec, i32 C'
+;  icmp <pred> iK %E, trunc(C)
+
+define i1 @test_i1_0(i1 %val) {
+; CHECK-LABEL: @test_i1_0(
+; CHECK-NEXT:    [[COND:%.*]] = xor i1 [[VAL:%.*]], true
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %insvec = insertelement <4 x i1> undef, i1 %val, i32 0
+  %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer
+  %cast = bitcast <4 x i1> %vec to i4
+  %cond = icmp eq i4 %cast, 0
+  ret i1 %cond
+}
+
+define i1 @test_i1_0_2(i1 %val) {
+; CHECK-LABEL: @test_i1_0_2(
+; CHECK-NEXT:    [[COND:%.*]] = xor i1 [[VAL:%.*]], true
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %insvec = insertelement <4 x i1> undef, i1 %val, i32 2
+  %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  %cast = bitcast <4 x i1> %vec to i4
+  %cond = icmp eq i4 %cast, 0
+  ret i1 %cond
+}
+
+define i1 @test_i1_m1(i1 %val) {
+; CHECK-LABEL: @test_i1_m1(
+; CHECK-NEXT:    ret i1 [[VAL:%.*]]
+;
+  %insvec = insertelement <4 x i1> undef, i1 %val, i32 0
+  %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer
+  %cast = bitcast <4 x i1> %vec to i4
+  %cond = icmp eq i4 %cast, -1
+  ret i1 %cond
+}
+
+define i1 @test_i8_pattern(i8 %val) {
+; CHECK-LABEL: @test_i8_pattern(
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %insvec = insertelement <4 x i8> undef, i8 %val, i32 0
+  %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
+  %cast = bitcast <4 x i8> %vec to i32
+  %cond = icmp eq i32 %cast, 1212696648
+  ret i1 %cond
+}
+
+define i1 @test_i8_pattern_2(i8 %val) {
+; CHECK-LABEL: @test_i8_pattern_2(
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[VAL:%.*]], 72
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %insvec = insertelement <4 x i8> undef, i8 %val, i32 2
+  %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  %cast = bitcast <4 x i8> %vec to i32
+  %cond = icmp eq i32 %cast, 1212696648
+  ret i1 %cond
+}
+
+; Make sure we don't try to fold if the shufflemask has differing element values
+define i1 @test_i8_pattern_3(<4 x i8> %invec) {
+; CHECK-LABEL: @test_i8_pattern_3(
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <4 x i8> [[INVEC:%.*]], <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696648
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %vec = shufflevector <4 x i8> %invec, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %cast = bitcast <4 x i8> %vec to i32
+  %cond = icmp eq i32 %cast, 1212696648
+  ret i1 %cond
+}
+
+; Make sure we don't try to fold if the compared-to constant isn't a splatted value
+define i1 @test_i8_nopattern(i8 %val) {
+; CHECK-LABEL: @test_i8_nopattern(
+; CHECK-NEXT:    [[INSVEC:%.*]] = insertelement <4 x i8> undef, i8 [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <4 x i8> [[INSVEC]], <4 x i8> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696647
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %insvec = insertelement <4 x i8> undef, i8 %val, i32 0
+  %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
+  %cast = bitcast <4 x i8> %vec to i32
+  %cond = icmp eq i32 %cast, 1212696647
+  ret i1 %cond
+}
+
+; Verify that we fold more than just the eq predicate
+define i1 @test_i8_ult_pattern(i8 %val) {
+; CHECK-LABEL: @test_i8_ult_pattern(
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i8 [[VAL:%.*]], 72
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %insvec = insertelement <4 x i8> undef, i8 %val, i32 0
+  %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer
+  %cast = bitcast <4 x i8> %vec to i32
+  %cond = icmp ult i32 %cast, 1212696648
+  ret i1 %cond
+}
+
+define i1 @extending_shuffle_with_weird_types(<2 x i9> %v) {
+; CHECK-LABEL: @extending_shuffle_with_weird_types(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i9> [[V:%.*]], i32 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i9 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %splat = shufflevector <2 x i9> %v, <2 x i9> undef, <3 x i32> zeroinitializer
+  %cast = bitcast <3 x i9> %splat to i27
+  %cmp = icmp slt i27 %cast, 262657 ; 0x040201
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll b/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll
new file mode 100644
index 00000000000..ec7828feacb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:40:64:64:32-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare i32 @test58_d(i64 )
+
+define i1 @test59(i8* %foo) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, i8* [[FOO:%.*]], i32 8
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i8* [[GEP1]] to i32
+; CHECK-NEXT:    [[USE:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
+; CHECK-NEXT:    ret i1 true
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32, i32* %bit, i64 2
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 10
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  %use = ptrtoint i8* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use)
+  ret i1 %cmp
+}
+
+define i1 @test59_as1(i8 addrspace(1)* %foo) {
+; CHECK-LABEL: @test59_as1(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[FOO:%.*]], i16 8
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[GEP1]] to i16
+; CHECK-NEXT:    [[USE:%.*]] = zext i16 [[TMP1]] to i64
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
+; CHECK-NEXT:    ret i1 true
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 2
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 10
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  %use = ptrtoint i8 addrspace(1)* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use)
+  ret i1 %cmp
+}
+
+define i1 @test60(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i32
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[GEP1_IDX]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i16
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+}
+
+; Same as test60, but look through an addrspacecast instead of a
+; bitcast. This uses the same sized addrspace.
+define i1 @test60_addrspacecast(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60_addrspacecast(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[J:%.*]] to i32
+; CHECK-NEXT:    [[I_TR:%.*]] = trunc i64 [[I:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[I_TR]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %bit = addrspacecast i8* %foo to i32 addrspace(3)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(3)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(3)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) {
+; CHECK-LABEL: @test60_addrspacecast_smaller(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[J:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %bit = addrspacecast i8* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(1)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_addrspacecast_larger(i8 addrspace(1)* %foo, i32 %i, i16 %j) {
+; CHECK-LABEL: @test60_addrspacecast_larger(
+; CHECK-NEXT:    [[I_TR:%.*]] = trunc i32 [[I:%.*]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 [[I_TR]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[TMP1]], [[J:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %bit = addrspacecast i8 addrspace(1)* %foo to i32 addrspace(2)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(2)* %bit, i32 %i
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 %j
+  %cast1 = addrspacecast i32 addrspace(2)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test61(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT:    [[BIT:%.*]] = bitcast i8* [[FOO:%.*]] to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i32
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8* [[FOO]], i32 [[TMP2]]
+; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8*
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8* [[GEP2]], [[CAST1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr i32, i32* %bit, i64 %i
+  %gep2 = getelementptr  i8,  i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+}
+
+define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test61_as1(
+; CHECK-NEXT:    [[BIT:%.*]] = bitcast i8 addrspace(1)* [[FOO:%.*]] to i32 addrspace(1)*
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 [[I:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* [[FOO]], i16 [[J:%.*]]
+; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)*
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 addrspace(1)* [[GEP2]], [[CAST1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr i32, i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr i8, i8 addrspace(1)* %foo, i16 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+}
+
+define i1 @test62(i8* %a) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT:    ret i1 true
+;
+  %arrayidx1 = getelementptr inbounds i8, i8* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 10
+  %cmp = icmp slt i8* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
+define i1 @test62_as1(i8 addrspace(1)* %a) {
+; CHECK-LABEL: @test62_as1(
+; CHECK-NEXT:    ret i1 true
+;
+  %arrayidx1 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 10
+  %cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
+
+; Variation of the above with an ashr
+define i1 @icmp_and_ashr_multiuse(i32 %X) {
+; CHECK-LABEL: @icmp_and_ashr_multiuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 240
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[X]], 496
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[AND2]], 432
+; CHECK-NEXT:    [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]]
+; CHECK-NEXT:    ret i1 [[AND3]]
+;
+  %shr = ashr i32 %X, 4
+  %and = and i32 %shr, 15
+  %and2 = and i32 %shr, 31 ; second use of the shift
+  %tobool = icmp ne i32 %and, 14
+  %tobool2 = icmp ne i32 %and2, 27
+  %and3 = and i1 %tobool, %tobool2
+  ret i1 %and3
+}
+
+define i1 @icmp_lshr_and_overshift(i8 %X) {
+; CHECK-LABEL: @icmp_lshr_and_overshift(
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ugt i8 [[X:%.*]], 31
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = lshr i8 %X, 5
+  %and = and i8 %shr, 15
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; We shouldn't simplify this because the and uses bits that are shifted in.
+define i1 @icmp_ashr_and_overshift(i8 %X) {
+; CHECK-LABEL: @icmp_ashr_and_overshift(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[SHR]], 15
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = ashr i8 %X, 5
+  %and = and i8 %shr, 15
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; PR16244
+define i1 @test71(i8* %x) {
+; CHECK-LABEL: @test71(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = getelementptr i8, i8* %x, i64 8
+  %b = getelementptr inbounds i8, i8* %x, i64 8
+  %c = icmp ugt i8* %a, %b
+  ret i1 %c
+}
+
+define i1 @test71_as1(i8 addrspace(1)* %x) {
+; CHECK-LABEL: @test71_as1(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = getelementptr i8, i8 addrspace(1)* %x, i64 8
+  %b = getelementptr inbounds i8, i8 addrspace(1)* %x, i64 8
+  %c = icmp ugt i8 addrspace(1)* %a, %b
+  ret i1 %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll
new file mode 100644
index 00000000000..4c0a56825be
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR30281 - https://llvm.org/bugs/show_bug.cgi?id=30281
+
+; All of these tests contain foldable division-by-constant instructions, but we
+; can't assert that those folds have occurred before we process the later icmp.
+
+define i32 @icmp_div(i16 %a, i16 %c) {
+; CHECK-LABEL: @icmp_div(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 %a, 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %then, label %exit
+; CHECK:       then:
+; CHECK-NEXT:    [[NOT_CMP:%.*]] = icmp eq i16 %c, 0
+; CHECK-NEXT:    [[PHITMP1:%.*]] = sext i1 [[NOT_CMP]] to i32
+; CHECK-NEXT:    br label %exit
+; CHECK:       exit:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ -1, %entry ], [ [[PHITMP1]], %then ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  %tobool = icmp eq i16 %a, 0
+  br i1 %tobool, label %then, label %exit
+
+then:
+  %div = sdiv i16 %c, -1
+  %cmp = icmp ne i16 %div, 0
+  br label %exit
+
+exit:
+  %phi = phi i1 [ false, %entry ], [ %cmp, %then ]
+  %zext = zext i1 %phi to i32
+  %add = add nsw i32 %zext, -1
+  ret i32 %add
+}
+
+define i32 @icmp_div2(i16 %a, i16 %c) {
+; CHECK-LABEL: @icmp_div2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 %a, 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %then, label %exit
+; CHECK:       then:
+; CHECK-NEXT:    br label %exit
+; CHECK:       exit:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ -1, %entry ], [ 0, %then ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  %tobool = icmp eq i16 %a, 0
+  br i1 %tobool, label %then, label %exit
+
+then:
+  %div = sdiv i16 %c, 0
+  %cmp = icmp ne i16 %div, 0
+  br label %exit
+
+exit:
+  %phi = phi i1 [ false, %entry ], [ %cmp, %then ]
+  %zext = zext i1 %phi to i32
+  %add = add nsw i32 %zext, -1
+  ret i32 %add
+}
+
+define i32 @icmp_div3(i16 %a, i16 %c) {
+; CHECK-LABEL: @icmp_div3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 %a, 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %then, label %exit
+; CHECK:       then:
+; CHECK-NEXT:    [[NOT_CMP:%.*]] = icmp eq i16 %c, 0
+; CHECK-NEXT:    [[PHITMP1:%.*]] = sext i1 [[NOT_CMP]] to i32
+; CHECK-NEXT:    br label %exit
+; CHECK:       exit:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ -1, %entry ], [ [[PHITMP1]], %then ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  %tobool = icmp eq i16 %a, 0
+  br i1 %tobool, label %then, label %exit
+
+then:
+  %div = sdiv i16 %c, 1
+  %cmp = icmp ne i16 %div, 0
+  br label %exit
+
+exit:
+  %phi = phi i1 [ false, %entry ], [ %cmp, %then ]
+  %zext = zext i1 %phi to i32
+  %add = add nsw i32 %zext, -1
+  ret i32 %add
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-dom.ll b/llvm/test/Transforms/InstCombine/icmp-dom.ll
new file mode 100644
index 00000000000..3e02fc4e8b9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-dom.ll
@@ -0,0 +1,350 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @idom_sign_bit_check_edge_dominates(i64 %a) {
+; CHECK-LABEL: @idom_sign_bit_check_edge_dominates(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[A:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[LOR_RHS:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    br label [[LOR_END:%.*]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i64 [[A]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[LOR_END]], label [[LAND_RHS:%.*]]
+; CHECK:       land.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp slt i64 %a, 0
+  br i1 %cmp, label %land.lhs.true, label %lor.rhs
+
+land.lhs.true:
+  br label %lor.end
+
+lor.rhs:
+  %cmp2 = icmp sgt i64 %a, 0
+  br i1 %cmp2, label %land.rhs, label %lor.end
+
+land.rhs:
+  br label %lor.end
+
+lor.end:
+  ret void
+}
+
+define void @idom_sign_bit_check_edge_not_dominates(i64 %a) {
+; CHECK-LABEL: @idom_sign_bit_check_edge_not_dominates(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[A:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[LOR_RHS:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    br i1 undef, label [[LOR_END:%.*]], label [[LOR_RHS]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i64 [[A]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[LAND_RHS:%.*]], label [[LOR_END]]
+; CHECK:       land.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp slt i64 %a, 0
+  br i1 %cmp, label %land.lhs.true, label %lor.rhs
+
+land.lhs.true:
+  br i1 undef, label %lor.end, label %lor.rhs
+
+lor.rhs:
+  %cmp2 = icmp sgt i64 %a, 0
+  br i1 %cmp2, label %land.rhs, label %lor.end
+
+land.rhs:
+  br label %lor.end
+
+lor.end:
+  ret void
+}
+
+define void @idom_sign_bit_check_edge_dominates_select(i64 %a, i64 %b) {
+; CHECK-LABEL: @idom_sign_bit_check_edge_dominates_select(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[LOR_RHS:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    br label [[LOR_END:%.*]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i64 [[A]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOR_END]], label [[LAND_RHS:%.*]]
+; CHECK:       land.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp slt i64 %a, 5
+  br i1 %cmp, label %land.lhs.true, label %lor.rhs
+
+land.lhs.true:
+  br label %lor.end
+
+lor.rhs:
+  %cmp2 = icmp sgt i64 %a, 5
+  %select = select i1 %cmp2, i64 %a, i64 5
+  %cmp3 = icmp ne i64 %select, %b
+  br i1 %cmp3, label %land.rhs, label %lor.end
+
+land.rhs:
+  br label %lor.end
+
+lor.end:
+  ret void
+}
+
+define void @idom_zbranch(i64 %a) {
+; CHECK-LABEL: @idom_zbranch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[A:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[A]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[LAND_RHS:%.*]], label [[LOR_END]]
+; CHECK:       land.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp sgt i64 %a, 0
+  br i1 %cmp, label %lor.end, label %lor.rhs
+
+lor.rhs:
+  %cmp2 = icmp slt i64 %a, 0
+  br i1 %cmp2, label %land.rhs, label %lor.end
+
+land.rhs:
+  br label %lor.end
+
+lor.end:
+  ret void
+}
+
+define void @idom_not_zbranch(i32 %a, i32 %b) {
+; CHECK-LABEL: @idom_not_zbranch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[A]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[RETURN]], label [[IF_THEN3:%.*]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %return, label %if.end
+
+if.end:
+  %cmp1 = icmp slt i32 %a, 0
+  %a. = select i1 %cmp1, i32 %a, i32 0
+  %cmp2 = icmp ne i32 %a., %b
+  br i1 %cmp2, label %if.then3, label %return
+
+if.then3:
+  br label %return
+
+return:
+  ret void
+}
+
+define void @trueblock_cmp_eq(i32 %a, i32 %b) {
+; CHECK-LABEL: @trueblock_cmp_eq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_END:%.*]], label [[RETURN:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A]], 1
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[RETURN]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.end, label %return
+
+if.end:
+  %cmp1 = icmp slt i32 %a, 2
+  br i1 %cmp1, label %if.then3, label %return
+
+if.then3:
+  br label %return
+
+return:
+  ret void
+}
+
+define i1 @trueblock_cmp_is_false(i32 %x, i32 %y) {
+; CHECK-LABEL: @trueblock_cmp_is_false(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %cmp = icmp sgt i32 %x, %y
+  br i1 %cmp, label %t, label %f
+t:
+  %cmp2 = icmp slt i32 %x, %y
+  ret i1 %cmp2
+f:
+  ret i1 %cmp
+}
+
+define i1 @trueblock_cmp_is_false_commute(i32 %x, i32 %y) {
+; CHECK-LABEL: @trueblock_cmp_is_false_commute(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %t, label %f
+t:
+  %cmp2 = icmp sgt i32 %y, %x
+  ret i1 %cmp2
+f:
+  ret i1 %cmp
+}
+
+define i1 @trueblock_cmp_is_true(i32 %x, i32 %y) {
+; CHECK-LABEL: @trueblock_cmp_is_true(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %cmp = icmp ult i32 %x, %y
+  br i1 %cmp, label %t, label %f
+t:
+  %cmp2 = icmp ne i32 %x, %y
+  ret i1 %cmp2
+f:
+  ret i1 %cmp
+}
+
+define i1 @trueblock_cmp_is_true_commute(i32 %x, i32 %y) {
+; CHECK-LABEL: @trueblock_cmp_is_true_commute(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %cmp = icmp ugt i32 %x, %y
+  br i1 %cmp, label %t, label %f
+t:
+  %cmp2 = icmp ne i32 %y, %x
+  ret i1 %cmp2
+f:
+  ret i1 %cmp
+}
+
+define i1 @falseblock_cmp_is_false(i32 %x, i32 %y) {
+; CHECK-LABEL: @falseblock_cmp_is_false(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp sle i32 %x, %y
+  br i1 %cmp, label %t, label %f
+t:
+  ret i1 %cmp
+f:
+  %cmp2 = icmp slt i32 %x, %y
+  ret i1 %cmp2
+}
+
+define i1 @falseblock_cmp_is_false_commute(i32 %x, i32 %y) {
+; CHECK-LABEL: @falseblock_cmp_is_false_commute(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %t, label %f
+t:
+  ret i1 %cmp
+f:
+  %cmp2 = icmp eq i32 %y, %x
+  ret i1 %cmp2
+}
+
+define i1 @falseblock_cmp_is_true(i32 %x, i32 %y) {
+; CHECK-LABEL: @falseblock_cmp_is_true(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %cmp = icmp ult i32 %x, %y
+  br i1 %cmp, label %t, label %f
+t:
+  ret i1 %cmp
+f:
+  %cmp2 = icmp uge i32 %x, %y
+  ret i1 %cmp2
+}
+
+define i1 @falseblock_cmp_is_true_commute(i32 %x, i32 %y) {
+; CHECK-LABEL: @falseblock_cmp_is_true_commute(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %cmp = icmp sgt i32 %x, %y
+  br i1 %cmp, label %t, label %f
+t:
+  ret i1 %cmp
+f:
+  %cmp2 = icmp sge i32 %y, %x
+  ret i1 %cmp2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-logical.ll b/llvm/test/Transforms/InstCombine/icmp-logical.ll
new file mode 100644
index 00000000000..f6f552a32c2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-logical.ll
@@ -0,0 +1,910 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+
+define i1 @masked_and_notallzeroes(i32 %A) {
+; CHECK-LABEL: @masked_and_notallzeroes(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[TST1:%.*]] = icmp ne i32 [[MASK1]], 0
+; CHECK-NEXT:    ret i1 [[TST1]]
+;
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 0
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 0
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[TST1:%.*]] = icmp eq i32 [[MASK1]], 0
+; CHECK-NEXT:    ret i1 [[TST1]]
+;
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 0
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notallones(i32 %A) {
+; CHECK-LABEL: @masked_and_notallones(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[TST1:%.*]] = icmp ne i32 [[MASK1]], 7
+; CHECK-NEXT:    ret i1 [[TST1]]
+;
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp ne i32 %mask1, 7
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, 39
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allones(i32 %A) {
+; CHECK-LABEL: @masked_or_allones(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[TST1:%.*]] = icmp eq i32 [[MASK1]], 7
+; CHECK-NEXT:    ret i1 [[TST1]]
+;
+  %mask1 = and i32 %A, 7
+  %tst1 = icmp eq i32 %mask1, 7
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 39
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notA(i32 %A) {
+; CHECK-LABEL: @masked_and_notA(
+; CHECK-NEXT:    [[MASK2:%.*]] = and i32 [[A:%.*]], 78
+; CHECK-NEXT:    [[TST2:%.*]] = icmp ne i32 [[MASK2]], [[A]]
+; CHECK-NEXT:    ret i1 [[TST2]]
+;
+  %mask1 = and i32 %A, 14
+  %tst1 = icmp ne i32 %mask1, %A
+  %mask2 = and i32 %A, 78
+  %tst2 = icmp ne i32 %mask2, %A
+  %res = and i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_and_notA_slightly_optimized(i32 %A) {
+; CHECK-LABEL: @masked_and_notA_slightly_optimized(
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[MASK2:%.*]] = and i32 [[A]], 39
+; CHECK-NEXT:    [[TST2:%.*]] = icmp ne i32 [[MASK2]], [[A]]
+; CHECK-NEXT:    [[RES:%.*]] = and i1 [[TMP0]], [[TST2]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %tmp0 = icmp uge i32 %A, 8
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp ne i32 %mask2, %A
+  %res = and i1 %tmp0, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_A(i32 %A) {
+; CHECK-LABEL: @masked_or_A(
+; CHECK-NEXT:    [[MASK2:%.*]] = and i32 [[A:%.*]], 78
+; CHECK-NEXT:    [[TST2:%.*]] = icmp eq i32 [[MASK2]], [[A]]
+; CHECK-NEXT:    ret i1 [[TST2]]
+;
+  %mask1 = and i32 %A, 14
+  %tst1 = icmp eq i32 %mask1, %A
+  %mask2 = and i32 %A, 78
+  %tst2 = icmp eq i32 %mask2, %A
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_A_slightly_optimized(i32 %A) {
+; CHECK-LABEL: @masked_or_A_slightly_optimized(
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[MASK2:%.*]] = and i32 [[A]], 39
+; CHECK-NEXT:    [[TST2:%.*]] = icmp eq i32 [[MASK2]], [[A]]
+; CHECK-NEXT:    [[RES:%.*]] = or i1 [[TMP0]], [[TST2]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %tmp0 = icmp ult i32 %A, 8
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, %A
+  %res = or i1 %tmp0, %tst2
+  ret i1 %res
+}
+
+define i1 @masked_or_allzeroes_notoptimised(i32 %A) {
+; CHECK-LABEL: @masked_or_allzeroes_notoptimised(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[TST1:%.*]] = icmp eq i32 [[MASK1]], 0
+; CHECK-NEXT:    [[MASK2:%.*]] = and i32 [[A]], 39
+; CHECK-NEXT:    [[TST2:%.*]] = icmp eq i32 [[MASK2]], 0
+; CHECK-NEXT:    [[RES:%.*]] = or i1 [[TST1]], [[TST2]]
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+  %mask1 = and i32 %A, 15
+  %tst1 = icmp eq i32 %mask1, 0
+  %mask2 = and i32 %A, 39
+  %tst2 = icmp eq i32 %mask2, 0
+  %res = or i1 %tst1, %tst2
+  ret i1 %res
+}
+
+define i1 @nomask_lhs(i32 %in) {
+; CHECK-LABEL: @nomask_lhs(
+; CHECK-NEXT:    [[MASKED:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[TST2:%.*]] = icmp eq i32 [[MASKED]], 0
+; CHECK-NEXT:    ret i1 [[TST2]]
+;
+  %tst1 = icmp eq i32 %in, 0
+  %masked = and i32 %in, 1
+  %tst2 = icmp eq i32 %masked, 0
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
+
+define i1 @nomask_rhs(i32 %in) {
+; CHECK-LABEL: @nomask_rhs(
+; CHECK-NEXT:    [[MASKED:%.*]] = and i32 [[IN:%.*]], 1
+; CHECK-NEXT:    [[TST1:%.*]] = icmp eq i32 [[MASKED]], 0
+; CHECK-NEXT:    ret i1 [[TST1]]
+;
+  %masked = and i32 %in, 1
+  %tst1 = icmp eq i32 %masked, 0
+  %tst2 = icmp eq i32 %in, 0
+  %val = or i1 %tst1, %tst2
+  ret i1 %val
+}
+
+; TODO: This test simplifies to a constant, so the functionality and test could be in InstSimplify.
+
+define i1 @fold_mask_cmps_to_false(i32 %x) {
+; CHECK-LABEL: @fold_mask_cmps_to_false(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp1 = and i32 %x, 2147483647
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = icmp eq i32 %x, 2147483647
+  %tmp4 = and i1 %tmp3, %tmp2
+  ret i1 %tmp4
+}
+
+; TODO: This test simplifies to a constant, so the functionality and test could be in InstSimplify.
+
+define i1 @fold_mask_cmps_to_true(i32 %x) {
+; CHECK-LABEL: @fold_mask_cmps_to_true(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp1 = and i32 %x, 2147483647
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = icmp ne i32 %x, 2147483647
+  %tmp4 = or i1 %tmp3, %tmp2
+  ret i1 %tmp4
+}
+
+; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401
+
+define i1 @cmpeq_bitwise(i8 %a, i8 %b, i8 %c, i8 %d) {
+; CHECK-LABEL: @cmpeq_bitwise(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = and i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %xor1 = xor i8 %a, %b
+  %xor2 = xor i8 %c, %d
+  %or = or i8 %xor1, %xor2
+  %cmp = icmp eq i8 %or, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @cmpne_bitwise(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
+; CHECK-LABEL: @cmpne_bitwise(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i64> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i64> [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %xor1 = xor <2 x i64> %a, %b
+  %xor2 = xor <2 x i64> %c, %d
+  %or = or <2 x i64> %xor1, %xor2
+  %cmp = icmp ne <2 x i64> %or, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+; ((X & 12) != 0 & (X & 3) == 1) -> no change
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_0(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 12
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp eq i32 %tmp3, 1
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 12) != 0 & (X & 7) == 1) -> (X & 15) == 9
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp eq i32 %tmp3, 1
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 14) != 0 & (X & 3) == 1) -> no change
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 14
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 14
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp eq i32 %tmp3, 1
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 3) != 0 & (X & 7) == 0) -> false
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_2(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_2(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp1 = and i32 %x, 3
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp eq i32 %tmp3, 0
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 15) != 0 & (X & 7) == 0) -> (X & 15) == 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_3(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp eq i32 %tmp3, 0
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 15) != 0 & (X & 3) == 0) -> no change
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_3b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_3b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp eq i32 %tmp3, 0
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 255) != 0 & (X & 15) == 8) -> (X & 15) == 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_4(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_4(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 255
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 15) != 0 & (X & 15) == 8) -> (X & 15) == 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_5(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_5(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 12) != 0 & (X & 15) == 8) -> (X & 15) == 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_6(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_6(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 7) != 0 & (X & 15) == 8) -> false
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_7(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_7(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp1 = and i32 %x, 7
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 6) != 0 & (X & 15) == 8) -> false
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_7b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_7b(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp1 = and i32 %x, 6
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 12) == 0 | (X & 3) != 1) -> !((X & 12) != 0 & (X & 3) == 1)) ->
+; no change
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_0(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 12
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp ne i32 %tmp3, 1
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 12) == 0 | (X & 7) != 1) -> !((X & 12) != 0 & (X & 7) == 1) ->
+; !((X & 15) == 9) -> (X & 15) != 9
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_1(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 9
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp ne i32 %tmp3, 1
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 14) == 0 | (X & 3) != 1) -> !((X & 14) != 0 & (X & 3) == 1) ->
+; no change.
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_1b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_1b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 14
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 14
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp ne i32 %tmp3, 1
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 3) == 0 | (X & 7) != 0) -> !((X & 3) != 0 & (X & 7) == 0) ->
+; !(false) -> true
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_2(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_2(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp1 = and i32 %x, 3
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 15) == 0 | (X & 7) != 0) -> !((X & 15) != 0 & (X & 7) == 0) ->
+; !((X & 15) == 8) -> (X & 15) != 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_3(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 15) == 0 | (X & 3) != 0) -> !((X & 15) != 0 & (X & 3) == 0) ->
+; no change.
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_3b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_3b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 255) == 0 | (X & 15) != 8) -> !(((X & 255) != 0 & (X & 15) == 8)) ->
+; !((X & 15) == 8) -> ((X & 15) != 8)
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_4(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_4(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 255
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 15) == 0 | (X & 15) != 8) -> !(((X & 15) != 0 & (X & 15) == 8)) ->
+; !((X & 15) == 8) -> ((X & 15) != 8)
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_5(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_5(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 12) == 0 | (X & 15) != 8) -> !(((X & 12) != 0 & (X & 15) == 8)) ->
+; !((X & 15) == 8) -> ((X & 15) != 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_6(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_6(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 7) == 0 | (X & 15) != 8) -> !(((X & 7) != 0 & (X & 15) == 8)) ->
+; !(false) -> true
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_7(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_7(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp1 = and i32 %x, 7
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; ((X & 6) == 0 | (X & 15) != 8) -> !(((X & 6) != 0 & (X & 15) == 8)) ->
+; !(false) -> true
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_7b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_7b(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp1 = and i32 %x, 6
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+
+; ((X & 12) != 0 & (X & 3) == 1) -> no change
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_0(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 12
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp eq i32 %tmp3, 1
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 12) != 0 & (X & 7) == 1) -> (X & 15) == 9
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp eq i32 %tmp3, 1
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 14) != 0 & (X & 3) == 1) -> no change
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_1b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 14
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 14
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp eq i32 %tmp3, 1
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 3) != 0 & (X & 7) == 0) -> false
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_2(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_2(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp1 = and i32 %x, 3
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp eq i32 %tmp3, 0
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 15) != 0 & (X & 7) == 0) -> (X & 15) == 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp eq i32 %tmp3, 0
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 15) != 0 & (X & 3) == 0) -> no change
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_3b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp eq i32 %tmp3, 0
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 255) != 0 & (X & 15) == 8) -> (X & 15) == 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_4(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_4(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 255
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 15) != 0 & (X & 15) == 8) -> (X & 15) == 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_5(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_5(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 12) != 0 & (X & 15) == 8) -> (X & 15) == 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_6(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_6(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 7) != 0 & (X & 15) == 8) -> false
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp1 = and i32 %x, 7
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 6) != 0 & (X & 15) == 8) -> false
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_swapped_7b(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp1 = and i32 %x, 6
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp eq i32 %tmp3, 8
+  %tmp5 = and i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 12) == 0 | (X & 3) != 1) -> !((X & 12) != 0 & (X & 3) == 1)) ->
+; no change
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_0(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 12
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp ne i32 %tmp3, 1
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 12) == 0 | (X & 7) != 1) -> !((X & 12) != 0 & (X & 7) == 1) ->
+; !((X & 15) == 9) -> (X & 15) != 9
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 9
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp ne i32 %tmp3, 1
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 14) == 0 | (X & 3) != 1) -> !((X & 14) != 0 & (X & 3) == 1) ->
+; no change.
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_1b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 14
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 14
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp ne i32 %tmp3, 1
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 3) == 0 | (X & 7) != 0) -> !((X & 3) != 0 & (X & 7) == 0) ->
+; !(false) -> true
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_2(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_2(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp1 = and i32 %x, 3
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 15) == 0 | (X & 7) != 0) -> !((X & 15) != 0 & (X & 7) == 0) ->
+; !((X & 15) == 8) -> (X & 15) != 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 7
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 15) == 0 | (X & 3) != 0) -> !((X & 15) != 0 & (X & 3) == 0) ->
+; no change.
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_3b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 3
+  %tmp4 = icmp ne i32 %tmp3, 0
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 255) == 0 | (X & 15) != 8) -> !(((X & 255) != 0 & (X & 15) == 8)) ->
+; !((X & 15) == 8) -> ((X & 15) != 8)
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_4(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_4(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 255
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 15) == 0 | (X & 15) != 8) -> !(((X & 15) != 0 & (X & 15) == 8)) ->
+; !((X & 15) == 8) -> ((X & 15) != 8)
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_5(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_5(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 15
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 12) == 0 | (X & 15) != 8) -> !(((X & 12) != 0 & (X & 15) == 8)) ->
+; !((X & 15) == 8) -> ((X & 15) != 8
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_6(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_6(
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 8
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %tmp1 = and i32 %x, 12
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 7) == 0 | (X & 15) != 8) -> !(((X & 7) != 0 & (X & 15) == 8)) ->
+; !(false) -> true
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp1 = and i32 %x, 7
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
+
+; ((X & 6) == 0 | (X & 15) != 8) -> !(((X & 6) != 0 & (X & 15) == 8)) ->
+; !(false) -> true
+define i1 @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7b(i32 %x) {
+; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_negated_swapped_7b(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp1 = and i32 %x, 6
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp3 = and i32 %x, 15
+  %tmp4 = icmp ne i32 %tmp3, 8
+  %tmp5 = or i1 %tmp4, %tmp2
+  ret i1 %tmp5
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
new file mode 100644
index 00000000000..093dfd8308e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-mul-zext.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @sterix(i32, i8, i64) {
+; CHECK-LABEL: @sterix(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0:%.*]] to i64
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1:%.*]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[CONV1]], 1945964878
+; CHECK-NEXT:    [[SH_PROM:%.*]] = trunc i64 [[TMP2:%.*]] to i32
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[MUL]], [[SH_PROM]]
+; CHECK-NEXT:    [[CONV2:%.*]] = zext i32 [[SHR]] to i64
+; CHECK-NEXT:    [[MUL3:%.*]] = mul nuw nsw i64 [[CONV]], [[CONV2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i64 [[MUL3]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP3]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[MUL3]], [[TMP2]]
+; CHECK-NEXT:    [[CONV4:%.*]] = trunc i64 [[AND]] to i32
+; CHECK-NEXT:    [[TOBOOL7:%.*]] = icmp eq i32 [[CONV4]], 0
+; CHECK-NEXT:    [[PHITMP:%.*]] = zext i1 [[TOBOOL7]] to i32
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[PHITMP]], [[LOR_RHS]] ]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %conv = zext i32 %0 to i64
+  %conv1 = sext i8 %1 to i32
+  %mul = mul i32 %conv1, 1945964878
+  %sh_prom = trunc i64 %2 to i32
+  %shr = lshr i32 %mul, %sh_prom
+  %conv2 = zext i32 %shr to i64
+  %mul3 = mul nuw nsw i64 %conv, %conv2
+  %conv6 = and i64 %mul3, 4294967295
+  %tobool = icmp ne i64 %conv6, %mul3
+  br i1 %tobool, label %lor.end, label %lor.rhs
+
+lor.rhs:
+  %and = and i64 %2, %mul3
+  %conv4 = trunc i64 %and to i32
+  %tobool7 = icmp ne i32 %conv4, 0
+  %lnot = xor i1 %tobool7, true
+  br label %lor.end
+
+lor.end:
+  %3 = phi i1 [ true, %entry ], [ %lnot, %lor.rhs ]
+  %conv8 = zext i1 %3 to i32
+  ret i32 %conv8
+}
+
+; https://bugs.llvm.org/show_bug.cgi?id=33765
+
+@glob = external global i16
+
+define void @PR33765(i8 %beth) {
+; CHECK-LABEL: @PR33765(
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32
+; CHECK-NEXT:    br i1 false, label [[IF_THEN9:%.*]], label [[IF_THEN9]]
+; CHECK:       if.then9:
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
+; CHECK-NEXT:    [[TINKY:%.*]] = load i16, i16* @glob, align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[MUL]] to i16
+; CHECK-NEXT:    [[CONV14:%.*]] = and i16 [[TINKY]], [[TMP1]]
+; CHECK-NEXT:    store i16 [[CONV14]], i16* @glob, align 2
+; CHECK-NEXT:    ret void
+;
+  %conv = zext i8 %beth to i32
+  %mul = mul nuw nsw i32 %conv, %conv
+  %conv3 = and i32 %mul, 255
+  %tobool8 = icmp ne i32 %mul, %conv3
+  br i1 %tobool8, label %if.then9, label %if.then9
+
+if.then9:
+  %tinky = load i16, i16* @glob
+  %conv13 = sext i16 %tinky to i32
+  %and = and i32 %mul, %conv13
+  %conv14 = trunc i32 %and to i16
+  store i16 %conv14, i16* @glob
+  ret void
+}
+
+; Repro case for bug involving mutating a list while
+; iterating it.
+
+declare i16 @aux(i8)
+
+define i16 @iter_breaker(i16 %a, i16 %b) {
+; CHECK-LABEL: @iter_breaker(
+; CHECK-NEXT:    [[UMUL:%.*]] = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 [[A:%.*]], i16 [[B:%.*]])
+; CHECK-NEXT:    [[UMUL_VALUE:%.*]] = extractvalue { i16, i1 } [[UMUL]], 0
+; CHECK-NEXT:    [[DID_OVF:%.*]] = extractvalue { i16, i1 } [[UMUL]], 1
+; CHECK-NEXT:    br i1 [[DID_OVF]], label [[RET1:%.*]], label [[RET2:%.*]]
+; CHECK:       ret1:
+; CHECK-NEXT:    [[TRUNC_REMAIN:%.*]] = trunc i16 [[UMUL_VALUE]] to i8
+; CHECK-NEXT:    [[VAL:%.*]] = call i16 @aux(i8 [[TRUNC_REMAIN]])
+; CHECK-NEXT:    ret i16 [[VAL]]
+; CHECK:       ret2:
+; CHECK-NEXT:    ret i16 [[UMUL_VALUE]]
+;
+  %a_wide = zext i16 %a to i32
+  %b_wide = zext i16 %b to i32
+  %mul_wide = mul i32 %a_wide, %b_wide              ; uses of %mul_wide will be iterated
+
+  %trunc_remain = trunc i32 %mul_wide to i8         ; this use will be replaced w/ new value
+  ; when iteration visits it, switching
+  ; iteration to the uses of new value
+
+  %trunc_unnecessary = trunc i32 %mul_wide to i16   ; uses of %trunc_unnecessary will have
+  ; been updated to uses of new value
+
+  %did_ovf = icmp ugt i32 %mul_wide, 65535
+  br i1 %did_ovf, label %ret1, label %ret2
+
+ret1:
+  %val = call i16 @aux(i8 %trunc_remain)
+  ret i16 %val
+
+ret2:
+  ret i16 %trunc_unnecessary              ; crash visiting this use after corrupting iterator
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll
new file mode 100644
index 00000000000..d67181028e4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll
@@ -0,0 +1,249 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Tests for slt/ult
+
+define i1 @slt_positive_multip_rem_zero(i8 %x) {
+; CHECK-LABEL: @slt_positive_multip_rem_zero(
+; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = icmp slt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nsw i8 %x, 7
+  %b = icmp slt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @slt_negative_multip_rem_zero(i8 %x) {
+; CHECK-LABEL: @slt_negative_multip_rem_zero(
+; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[X:%.*]], -7
+; CHECK-NEXT:    [[B:%.*]] = icmp slt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nsw i8 %x, -7
+  %b = icmp slt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @slt_positive_multip_rem_nz(i8 %x) {
+; CHECK-LABEL: @slt_positive_multip_rem_nz(
+; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp slt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nsw i8 %x, 5
+  %b = icmp slt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @ult_rem_zero(i8 %x) {
+; CHECK-LABEL: @ult_rem_zero(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = icmp ult i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nuw i8 %x, 7
+  %b = icmp ult i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @ult_rem_nz(i8 %x) {
+; CHECK-LABEL: @ult_rem_nz(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp ult i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nuw i8 %x, 5
+  %b = icmp ult i8 %a, 21
+  ret i1 %b
+}
+
+; Tests for sgt/ugt
+
+define i1 @sgt_positive_multip_rem_zero(i8 %x) {
+; CHECK-LABEL: @sgt_positive_multip_rem_zero(
+; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nsw i8 %x, 7
+  %b = icmp sgt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @sgt_negative_multip_rem_zero(i8 %x) {
+; CHECK-LABEL: @sgt_negative_multip_rem_zero(
+; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[X:%.*]], -7
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nsw i8 %x, -7
+  %b = icmp sgt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @sgt_positive_multip_rem_nz(i8 %x) {
+; CHECK-LABEL: @sgt_positive_multip_rem_nz(
+; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nsw i8 %x, 5
+  %b = icmp sgt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @ugt_rem_zero(i8 %x) {
+; CHECK-LABEL: @ugt_rem_zero(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nuw i8 %x, 7
+  %b = icmp ugt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @ugt_rem_nz(i8 %x) {
+; CHECK-LABEL: @ugt_rem_nz(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nuw i8 %x, 5
+  %b = icmp ugt i8 %a, 21
+  ret i1 %b
+}
+
+; Tests for eq/ne
+
+define i1 @eq_rem_zero(i8 %x) {
+; CHECK-LABEL: @eq_rem_zero(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i8 [[A]], 20
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nuw i8 %x, 5
+  %b = icmp eq i8 %a, 20
+  ret i1 %b
+}
+
+define i1 @ne_rem_zero(i8 %x) {
+; CHECK-LABEL: @ne_rem_zero(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i8 [[A]], 30
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nuw i8 %x, 5
+  %b = icmp ne i8 %a, 30
+  ret i1 %b
+}
+
+define i1 @eq_rem_nz(i8 %x) {
+; CHECK-LABEL: @eq_rem_nz(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i8 [[A]], 31
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nuw i8 %x, 5
+  %b = icmp eq i8 %a, 31
+  ret i1 %b
+}
+
+define i1 @ne_rem_nz(i8 %x) {
+; CHECK-LABEL: @ne_rem_nz(
+; CHECK-NEXT:    [[A:%.*]] = mul nuw i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i8 [[A]], 31
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nuw i8 %x, 5
+  %b = icmp ne i8 %a, 31
+  ret i1 %b
+}
+
+; Negative tests for the icmp mul folds
+
+define i1 @sgt_positive_multip_rem_zero_nonsw(i8 %x) {
+; CHECK-LABEL: @sgt_positive_multip_rem_zero_nonsw(
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul i8 %x, 7
+  %b = icmp sgt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @ult_multip_rem_zero_nonsw(i8 %x) {
+; CHECK-LABEL: @ult_multip_rem_zero_nonsw(
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = icmp ult i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul i8 %x, 7
+  %b = icmp ult i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @ugt_rem_zero_nonuw(i8 %x) {
+; CHECK-LABEL: @ugt_rem_zero_nonuw(
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i8 [[A]], 21
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul i8 %x, 7
+  %b = icmp ugt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @sgt_minnum(i8 %x) {
+; CHECK-LABEL: @sgt_minnum(
+; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i8 [[A]], -128
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul nsw i8 %x, 7
+  %b = icmp sgt i8 %a, -128
+  ret i1 %b
+}
+
+define i1 @ule_bignum(i8 %x) {
+; CHECK-LABEL: @ule_bignum(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul i8 %x, 2147483647
+  %b = icmp ule i8 %a, 0
+  ret i1 %b
+}
+
+define i1 @sgt_mulzero(i8 %x) {
+; CHECK-LABEL: @sgt_mulzero(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = mul nsw i8 %x, 0
+  %b = icmp sgt i8 %a, 21
+  ret i1 %b
+}
+
+define i1 @eq_rem_zero_nonuw(i8 %x) {
+; CHECK-LABEL: @eq_rem_zero_nonuw(
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i8 [[A]], 20
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul i8 %x, 5
+  %b = icmp eq i8 %a, 20
+  ret i1 %b
+}
+
+define i1 @ne_rem_zero_nonuw(i8 %x) {
+; CHECK-LABEL: @ne_rem_zero_nonuw(
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i8 [[A]], 30
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = mul i8 %x, 5
+  %b = icmp ne i8 %a, 30
+  ret i1 %b
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll
new file mode 100644
index 00000000000..f035683170e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-range.ll
@@ -0,0 +1,150 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; These should be InstSimplify checks, but most of the code
+; is currently only in InstCombine.  TODO: move supporting code
+
+; Definitely out of range
+define i1 @test_nonzero(i32* nocapture readonly %arg) {
+; CHECK-LABEL:test_nonzero
+; CHECK: ret i1 true
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+define i1 @test_nonzero2(i32* nocapture readonly %arg) {
+; CHECK-LABEL:test_nonzero2
+; CHECK: ret i1 false
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp eq i32 %val, 0
+  ret i1 %rval
+}
+
+; Potentially in range
+define i1 @test_nonzero3(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero3
+; Check that this does not trigger - it wouldn't be legal
+; CHECK: icmp
+  %val = load i32, i32* %arg, !range !1
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+
+; Definitely in range
+define i1 @test_nonzero4(i8* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero4
+; CHECK: ret i1 false
+  %val = load i8, i8* %arg, !range !2
+  %rval = icmp ne i8 %val, 0
+  ret i1 %rval
+}
+
+define i1 @test_nonzero5(i8* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero5
+; CHECK: ret i1 false
+  %val = load i8, i8* %arg, !range !2
+  %rval = icmp ugt i8 %val, 0
+  ret i1 %rval
+}
+
+; Cheaper checks (most values in range meet requirements)
+define i1 @test_nonzero6(i8* %argw) {
+; CHECK-LABEL: test_nonzero6
+; CHECK: icmp ne i8 %val, 0
+  %val = load i8, i8* %argw, !range !3
+  %rval = icmp sgt i8 %val, 0
+  ret i1 %rval
+}
+
+; Constant not in range, should return true.
+define i1 @test_not_in_range(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_not_in_range
+; CHECK: ret i1 true
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp ne i32 %val, 6
+  ret i1 %rval
+}
+
+; Constant in range, can not fold.
+define i1 @test_in_range(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_in_range
+; CHECK: icmp ne i32 %val, 3
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp ne i32 %val, 3
+  ret i1 %rval
+}
+
+; Values in range greater than constant.
+define i1 @test_range_sgt_constant(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_range_sgt_constant
+; CHECK: ret i1 true
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp sgt i32 %val, 0
+  ret i1 %rval
+}
+
+; Values in range less than constant.
+define i1 @test_range_slt_constant(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_range_slt_constant
+; CHECK: ret i1 false
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp sgt i32 %val, 6
+  ret i1 %rval
+}
+
+; Values in union of multiple sub ranges not equal to constant.
+define i1 @test_multi_range1(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_multi_range1
+; CHECK: ret i1 true
+  %val = load i32, i32* %arg, !range !4
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+
+; Values in multiple sub ranges not equal to constant, but in
+; union of sub ranges could possibly equal to constant. This
+; in theory could also be folded and might be implemented in 
+; the future if shown profitable in practice.
+define i1 @test_multi_range2(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_multi_range2
+; CHECK: icmp ne i32 %val, 7
+  %val = load i32, i32* %arg, !range !4
+  %rval = icmp ne i32 %val, 7
+  ret i1 %rval
+}
+
+; Values' ranges overlap each other, so it can not be simplified.
+define i1 @test_two_ranges(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges
+; CHECK: icmp ult i32 %val2, %val1
+  %val1 = load i32, i32* %arg1, !range !5
+  %val2 = load i32, i32* %arg2, !range !6
+  %rval = icmp ult i32 %val2, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_two_ranges2(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges2
+; CHECK: ret i1 false
+  %val1 = load i32, i32* %arg1, !range !0
+  %val2 = load i32, i32* %arg2, !range !6
+  %rval = icmp ult i32 %val2, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to true.
+define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges3
+; CHECK: ret i1 true
+  %val1 = load i32, i32* %arg1, !range !0
+  %val2 = load i32, i32* %arg2, !range !6
+  %rval = icmp ugt i32 %val2, %val1
+  ret i1 %rval
+}
+
+!0 = !{i32 1, i32 6} 
+!1 = !{i32 0, i32 6} 
+!2 = !{i8 0, i8 1} 
+!3 = !{i8 0, i8 6} 
+!4 = !{i32 1, i32 6, i32 8, i32 10}
+!5 = !{i32 5, i32 10} 
+!6 = !{i32 8, i32 16} 
diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
new file mode 100644
index 00000000000..ba05302897e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; If the (shl x, C) preserved the sign and this is a sign test,
+; compare the LHS operand instead
+
+define i1 @icmp_shl_nsw_sgt(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_sgt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sgt i32 %shl, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_shl_nsw_sge0(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_sge0(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 %x, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sge i32 %shl, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_shl_nsw_sge1(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_sge1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sge i32 %shl, 1
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_nsw_sge1_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_shl_nsw_sge1_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> %x, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl nsw <2 x i32> %x, <i32 21, i32 21>
+  %cmp = icmp sge <2 x i32> %shl, <i32 1, i32 1>
+  ret <2 x i1> %cmp
+}
+
+; Checks for icmp (eq|ne) (shl x, C), 0
+
+define i1 @icmp_shl_nsw_eq(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = shl nsw i32 %x, 5
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_nsw_eq_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_shl_nsw_eq_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> %x, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %mul = shl nsw <2 x i32> %x, <i32 5, i32 5>
+  %cmp = icmp eq <2 x i32> %mul, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+; icmp sgt with shl nsw with a constant compare operand and constant
+; shift amount can always be reduced to icmp sgt alone.
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sgt1(i8 %x) {
+; CHECK-LABEL: @icmp_sgt1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, -64
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sgt i8 %shl, -128
+  ret i1 %cmp
+}
+
+define i1 @icmp_sgt2(i8 %x) {
+; CHECK-LABEL: @icmp_sgt2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -64
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sgt i8 %shl, -127
+  ret i1 %cmp
+}
+
+define i1 @icmp_sgt3(i8 %x) {
+; CHECK-LABEL: @icmp_sgt3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -8
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sgt i8 %shl, -16
+  ret i1 %cmp
+}
+
+define i1 @icmp_sgt4(i8 %x) {
+; CHECK-LABEL: @icmp_sgt4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sgt i8 %shl, -2
+  ret i1 %cmp
+}
+
+; x >s -1 is a sign bit test.
+; x >s 0 is a sign bit test.
+
+define i1 @icmp_sgt5(i8 %x) {
+; CHECK-LABEL: @icmp_sgt5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sgt i8 %shl, 1
+  ret i1 %cmp
+}
+
+define i1 @icmp_sgt6(i8 %x) {
+; CHECK-LABEL: @icmp_sgt6(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, 8
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sgt i8 %shl, 16
+  ret i1 %cmp
+}
+
+define i1 @icmp_sgt7(i8 %x) {
+; CHECK-LABEL: @icmp_sgt7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, 62
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sgt i8 %shl, 124
+  ret i1 %cmp
+}
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sgt8(i8 %x) {
+; CHECK-LABEL: @icmp_sgt8(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, 63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sgt i8 %shl, 125
+  ret i1 %cmp
+}
+
+; Compares with 126 and 127 are recognized as always false.
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sgt9(i8 %x) {
+; CHECK-LABEL: @icmp_sgt9(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sgt i8 %shl, -128
+  ret i1 %cmp
+}
+
+define i1 @icmp_sgt10(i8 %x) {
+; CHECK-LABEL: @icmp_sgt10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sgt i8 %shl, -127
+  ret i1 %cmp
+}
+
+define i1 @icmp_sgt11(i8 %x) {
+; CHECK-LABEL: @icmp_sgt11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sgt i8 %shl, -2
+  ret i1 %cmp
+}
+
+; Splat vector version should fold the same way.
+
+define <2 x i1> @icmp_sgt11_vec(<2 x i8> %x) {
+; CHECK-LABEL: @icmp_sgt11_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl nsw <2 x i8> %x, <i8 7, i8 7>
+  %cmp = icmp sgt <2 x i8> %shl, <i8 -2, i8 -2>
+  ret <2 x i1> %cmp
+}
+
+; Known bits analysis returns false for compares with >=0.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Repeat the shl nsw + sgt tests with predicate changed to 'sle'.
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sle1(i8 %x) {
+; CHECK-LABEL: @icmp_sle1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, -64
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, -128
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle2(i8 %x) {
+; CHECK-LABEL: @icmp_sle2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, -63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, -127
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle3(i8 %x) {
+; CHECK-LABEL: @icmp_sle3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, -7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, -16
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle4(i8 %x) {
+; CHECK-LABEL: @icmp_sle4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, -2
+  ret i1 %cmp
+}
+
+; x <=s -1 is a sign bit test.
+; x <=s 0 is a sign bit test.
+
+define i1 @icmp_sle5(i8 %x) {
+; CHECK-LABEL: @icmp_sle5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, 1
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle6(i8 %x) {
+; CHECK-LABEL: @icmp_sle6(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 9
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, 16
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle7(i8 %x) {
+; CHECK-LABEL: @icmp_sle7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, 124
+  ret i1 %cmp
+}
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sle8(i8 %x) {
+; CHECK-LABEL: @icmp_sle8(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, 63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp sle i8 %shl, 125
+  ret i1 %cmp
+}
+
+; Compares with 126 and 127 are recognized as always true.
+
+; Known bits analysis turns this into an equality predicate.
+
+define i1 @icmp_sle9(i8 %x) {
+; CHECK-LABEL: @icmp_sle9(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sle i8 %shl, -128
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle10(i8 %x) {
+; CHECK-LABEL: @icmp_sle10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sle i8 %shl, -127
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle11(i8 %x) {
+; CHECK-LABEL: @icmp_sle11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 7
+  %cmp = icmp sle i8 %shl, -2
+  ret i1 %cmp
+}
+
+; Some of the earlier sgt/sle tests are transformed to eq/ne, but try a couple
+; of those explicitly, so we know no intermediate transforms are necessary.
+
+define i1 @icmp_eq1(i8 %x) {
+; CHECK-LABEL: @icmp_eq1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %x, 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 1
+  %cmp = icmp eq i8 %shl, 12
+  ret i1 %cmp
+}
+
+define i1 @icmp_ne1(i8 %x) {
+; CHECK-LABEL: @icmp_ne1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %x, -2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl nsw i8 %x, 6
+  %cmp = icmp ne i8 %shl, -128
+  ret i1 %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll
new file mode 100644
index 00000000000..4d85c095c44
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -instcombine -S | FileCheck %s
+
+define i1 @icmp_ugt_32(i64) {
+; CHECK-LABEL: @icmp_ugt_32(
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i64 %0, 0
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %c = shl nuw i64 %0, 32
+  %d = icmp ugt i64 %c, 4294967295
+  ret i1 %d
+}
+
+define i1 @icmp_ule_64(i128) {
+; CHECK-LABEL: @icmp_ule_64(
+; CHECK-NEXT:    [[D:%.*]] = icmp eq i128 %0, 0
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %c = shl nuw i128 %0, 64
+  %d = icmp ule i128 %c, 18446744073709551615
+  ret i1 %d
+}
+
+define i1 @icmp_ugt_16(i64) {
+; CHECK-LABEL: @icmp_ugt_16(
+; CHECK-NEXT:    [[D:%.*]] = icmp ugt i64 %0, 15
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %c = shl nuw i64 %0, 16
+  %d = icmp ugt i64 %c, 1048575 ; 0x0f_ffff
+  ret i1 %d
+}
+
+define <2 x i1> @icmp_ule_16x2(<2 x i64>) {
+; CHECK-LABEL: @icmp_ule_16x2(
+; CHECK-NEXT:    [[D:%.*]] = icmp eq <2 x i64> %0, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %c = shl nuw <2 x i64> %0, <i64 16, i64 16>
+  %d = icmp ule <2 x i64> %c, <i64 65535, i64 65535>
+  ret <2 x i1> %d
+}
+
+define <2 x i1> @icmp_ule_16x2_nonzero(<2 x i64>) {
+; CHECK-LABEL: @icmp_ule_16x2_nonzero(
+; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i64> %0, <i64 4, i64 4>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %c = shl nuw <2 x i64> %0, <i64 16, i64 16>
+  %d = icmp ule <2 x i64> %c, <i64 196608, i64 196608>  ; 0x03_0000
+  ret <2 x i1> %d
+}
+
+define <2 x i1> @icmp_ule_12x2(<2 x i64>) {
+; CHECK-LABEL: @icmp_ule_12x2(
+; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i64> %0, <i64 4, i64 4>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %c = shl nuw <2 x i64> %0, <i64 12, i64 12>
+  %d = icmp ule <2 x i64> %c, <i64 12288, i64 12288>  ; 0x3000
+  ret <2 x i1> %d
+}
+
+define i1 @icmp_ult_8(i64) {
+; CHECK-LABEL: @icmp_ult_8(
+; CHECK-NEXT:    [[D:%.*]] = icmp ult i64 %0, 16
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %c = shl nuw i64 %0, 8
+  %d = icmp ult i64 %c, 4095 ; 0x0fff
+  ret i1 %d
+}
+
+define <2 x i1> @icmp_uge_8x2(<2 x i16>) {
+; CHECK-LABEL: @icmp_uge_8x2(
+; CHECK-NEXT:    [[D:%.*]] = icmp ugt <2 x i16> %0, <i16 15, i16 15>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %c = shl nuw <2 x i16> %0, <i16 8, i16 8>
+  %d = icmp uge <2 x i16> %c, <i16 4095, i16 4095>
+  ret <2 x i1> %d
+}
+
+define <2 x i1> @icmp_ugt_16x2(<2 x i32>) {
+; CHECK-LABEL: @icmp_ugt_16x2(
+; CHECK-NEXT:    [[D:%.*]] = icmp ugt <2 x i32> %0, <i32 15, i32 15>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %c = shl nuw <2 x i32> %0, <i32 16, i32 16>
+  %d = icmp ugt <2 x i32> %c, <i32 1048575, i32 1048575>
+  ret <2 x i1> %d
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
new file mode 100644
index 00000000000..bf1a031a412
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
@@ -0,0 +1,3546 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @lshrugt_01_00(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_00(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, 1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_01(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_01(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, 3
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_02(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_02(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, 5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_03(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_03(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_04(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_04(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, -7
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_05(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_05(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_06(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_06(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, -3
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_07(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_07(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_08(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_08(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_09(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_09(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_10(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_10(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_11(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_11(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_12(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_12(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_13(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_13(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_14(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_14(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_15(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_15(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ugt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_00(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_00(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, 3
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_01(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_01(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_02(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_02(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_03(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_03(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_04(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_04(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_05(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_05(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_06(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_06(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_07(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_07(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_08(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_08(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_09(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_09(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_10(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_10(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_11(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_11(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_12(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_12(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_13(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_13(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_14(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_14(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_15(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_15(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ugt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_00(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_00(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_01(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_01(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_02(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_02(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_03(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_03(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_04(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_04(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_05(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_05(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_06(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_06(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_07(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_07(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_08(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_08(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_09(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_09(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_10(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_10(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_11(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_11(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_12(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_12(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_13(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_13(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_14(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_14(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_15(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_15(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ugt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrult_01_00(i4 %x) {
+; CHECK-LABEL: @lshrult_01_00(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrult_01_01(i4 %x) {
+; CHECK-LABEL: @lshrult_01_01(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrult_01_02(i4 %x) {
+; CHECK-LABEL: @lshrult_01_02(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrult_01_03(i4 %x) {
+; CHECK-LABEL: @lshrult_01_03(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, 6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrult_01_04(i4 %x) {
+; CHECK-LABEL: @lshrult_01_04(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrult_01_05(i4 %x) {
+; CHECK-LABEL: @lshrult_01_05(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, -6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrult_01_06(i4 %x) {
+; CHECK-LABEL: @lshrult_01_06(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrult_01_07(i4 %x) {
+; CHECK-LABEL: @lshrult_01_07(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, -2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrult_01_08(i4 %x) {
+; CHECK-LABEL: @lshrult_01_08(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrult_01_09(i4 %x) {
+; CHECK-LABEL: @lshrult_01_09(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrult_01_10(i4 %x) {
+; CHECK-LABEL: @lshrult_01_10(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrult_01_11(i4 %x) {
+; CHECK-LABEL: @lshrult_01_11(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrult_01_12(i4 %x) {
+; CHECK-LABEL: @lshrult_01_12(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrult_01_13(i4 %x) {
+; CHECK-LABEL: @lshrult_01_13(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrult_01_14(i4 %x) {
+; CHECK-LABEL: @lshrult_01_14(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrult_01_15(i4 %x) {
+; CHECK-LABEL: @lshrult_01_15(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 1
+  %c = icmp ult i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrult_02_00(i4 %x) {
+; CHECK-LABEL: @lshrult_02_00(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrult_02_01(i4 %x) {
+; CHECK-LABEL: @lshrult_02_01(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrult_02_02(i4 %x) {
+; CHECK-LABEL: @lshrult_02_02(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrult_02_03(i4 %x) {
+; CHECK-LABEL: @lshrult_02_03(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrult_02_04(i4 %x) {
+; CHECK-LABEL: @lshrult_02_04(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrult_02_05(i4 %x) {
+; CHECK-LABEL: @lshrult_02_05(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrult_02_06(i4 %x) {
+; CHECK-LABEL: @lshrult_02_06(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrult_02_07(i4 %x) {
+; CHECK-LABEL: @lshrult_02_07(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrult_02_08(i4 %x) {
+; CHECK-LABEL: @lshrult_02_08(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrult_02_09(i4 %x) {
+; CHECK-LABEL: @lshrult_02_09(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrult_02_10(i4 %x) {
+; CHECK-LABEL: @lshrult_02_10(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrult_02_11(i4 %x) {
+; CHECK-LABEL: @lshrult_02_11(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrult_02_12(i4 %x) {
+; CHECK-LABEL: @lshrult_02_12(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrult_02_13(i4 %x) {
+; CHECK-LABEL: @lshrult_02_13(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrult_02_14(i4 %x) {
+; CHECK-LABEL: @lshrult_02_14(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrult_02_15(i4 %x) {
+; CHECK-LABEL: @lshrult_02_15(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 2
+  %c = icmp ult i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrult_03_00(i4 %x) {
+; CHECK-LABEL: @lshrult_03_00(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrult_03_01(i4 %x) {
+; CHECK-LABEL: @lshrult_03_01(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrult_03_02(i4 %x) {
+; CHECK-LABEL: @lshrult_03_02(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrult_03_03(i4 %x) {
+; CHECK-LABEL: @lshrult_03_03(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrult_03_04(i4 %x) {
+; CHECK-LABEL: @lshrult_03_04(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrult_03_05(i4 %x) {
+; CHECK-LABEL: @lshrult_03_05(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrult_03_06(i4 %x) {
+; CHECK-LABEL: @lshrult_03_06(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrult_03_07(i4 %x) {
+; CHECK-LABEL: @lshrult_03_07(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrult_03_08(i4 %x) {
+; CHECK-LABEL: @lshrult_03_08(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrult_03_09(i4 %x) {
+; CHECK-LABEL: @lshrult_03_09(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrult_03_10(i4 %x) {
+; CHECK-LABEL: @lshrult_03_10(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrult_03_11(i4 %x) {
+; CHECK-LABEL: @lshrult_03_11(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrult_03_12(i4 %x) {
+; CHECK-LABEL: @lshrult_03_12(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrult_03_13(i4 %x) {
+; CHECK-LABEL: @lshrult_03_13(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrult_03_14(i4 %x) {
+; CHECK-LABEL: @lshrult_03_14(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrult_03_15(i4 %x) {
+; CHECK-LABEL: @lshrult_03_15(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr i4 %x, 3
+  %c = icmp ult i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_00(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_00(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_01(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_01(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 3
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_02(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_02(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_03(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_03(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_04(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_04(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_05(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_05(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_06(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_06(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_07(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_07(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_08(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_08(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_09(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_09(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_10(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_10(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_11(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_11(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_12(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_12(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -7
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_13(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_13(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_14(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_14(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -3
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_15(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_15(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp sgt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_00(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_00(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 3
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_01(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_01(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_02(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_02(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_03(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_03(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_04(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_04(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_05(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_05(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_06(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_06(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_07(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_07(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_08(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_08(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_09(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_09(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_10(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_10(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_11(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_11(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_12(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_12(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_13(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_13(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_14(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_14(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_15(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_15(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 2
+  %c = icmp sgt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_00(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_00(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_01(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_01(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_02(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_02(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_03(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_03(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_04(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_04(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_05(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_05(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_06(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_06(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_07(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_07(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_08(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_08(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_09(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_09(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_10(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_10(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_11(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_11(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_12(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_12(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_13(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_13(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_14(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_14(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_15(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_15(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 3
+  %c = icmp sgt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_00(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_00(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_01(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_01(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_02(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_02(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_03(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_03(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_04(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_04(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_05(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_05(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_06(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_06(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_07(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_07(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_08(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_08(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_09(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_09(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_10(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_10(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_11(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_11(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_12(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_12(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_13(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_13(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_14(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_14(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_15(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_15(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 1
+  %c = icmp slt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_00(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_00(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_01(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_01(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_02(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_02(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_03(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_03(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_04(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_04(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_05(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_05(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_06(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_06(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_07(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_07(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_08(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_08(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_09(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_09(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_10(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_10(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_11(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_11(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_12(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_12(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_13(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_13(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_14(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_14(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_15(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_15(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 2
+  %c = icmp slt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_00(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_00(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_01(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_01(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_02(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_02(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_03(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_03(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_04(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_04(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_05(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_05(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_06(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_06(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_07(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_07(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_08(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_08(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_09(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_09(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_10(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_10(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_11(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_11(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_12(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_12(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_13(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_13(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_14(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_14(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_15(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_15(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i4 %x, 3
+  %c = icmp slt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_00_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_00_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_01_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_01_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_02_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_02_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_03_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_03_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, 6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_04_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_04_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, -8
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_05_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_05_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, -6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_06_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_06_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i4 %x, -2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_07_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_07_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_08_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_08_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_09_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_09_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_10_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_10_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_11_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_11_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_12_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_12_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_13_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_13_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_14_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_14_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrugt_01_15_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_01_15_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ugt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_00_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_00_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_01_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_01_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_02_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_02_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_03_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_03_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_04_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_04_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_05_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_05_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_06_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_06_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_07_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_07_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_08_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_08_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_09_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_09_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_10_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_10_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_11_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_11_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_12_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_12_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_13_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_13_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_14_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_14_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrugt_02_15_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_02_15_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ugt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_00_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_00_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_01_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_01_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_02_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_02_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_03_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_03_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_04_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_04_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_05_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_05_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_06_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_06_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_07_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_07_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_08_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_08_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_09_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_09_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_10_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_10_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_11_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_11_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_12_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_12_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_13_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_13_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_14_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_14_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrugt_03_15_exact(i4 %x) {
+; CHECK-LABEL: @lshrugt_03_15_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ugt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrult_01_00_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_00_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrult_01_01_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_01_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrult_01_02_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_02_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrult_01_03_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_03_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, 6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrult_01_04_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_04_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrult_01_05_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_05_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, -6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrult_01_06_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_06_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrult_01_07_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_07_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrult_01_08_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_08_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrult_01_09_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_09_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrult_01_10_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_10_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrult_01_11_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_11_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrult_01_12_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_12_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrult_01_13_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_13_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrult_01_14_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_14_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrult_01_15_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_01_15_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 1
+  %c = icmp ult i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrult_02_00_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_00_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrult_02_01_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_01_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrult_02_02_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_02_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrult_02_03_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_03_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrult_02_04_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_04_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrult_02_05_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_05_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrult_02_06_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_06_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrult_02_07_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_07_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrult_02_08_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_08_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrult_02_09_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_09_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrult_02_10_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_10_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrult_02_11_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_11_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrult_02_12_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_12_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrult_02_13_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_13_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrult_02_14_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_14_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrult_02_15_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_02_15_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 2
+  %c = icmp ult i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @lshrult_03_00_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_00_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @lshrult_03_01_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_01_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -8
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @lshrult_03_02_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_02_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @lshrult_03_03_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_03_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @lshrult_03_04_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_04_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @lshrult_03_05_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_05_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @lshrult_03_06_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_06_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @lshrult_03_07_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_07_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @lshrult_03_08_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_08_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @lshrult_03_09_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_09_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @lshrult_03_10_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_10_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @lshrult_03_11_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_11_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @lshrult_03_12_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_12_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @lshrult_03_13_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_13_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @lshrult_03_14_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_14_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @lshrult_03_15_exact(i4 %x) {
+; CHECK-LABEL: @lshrult_03_15_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = lshr exact i4 %x, 3
+  %c = icmp ult i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_00_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_00_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_01_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_01_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_02_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_02_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_03_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_03_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_04_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_04_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_05_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_05_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_06_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_06_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_07_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_07_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_08_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_08_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_09_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_09_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_10_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_10_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_11_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_11_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_12_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_12_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -8
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_13_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_13_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_14_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_14_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrsgt_01_15_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_01_15_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp sgt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_00_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_00_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_01_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_01_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_02_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_02_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_03_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_03_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_04_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_04_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_05_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_05_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_06_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_06_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_07_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_07_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_08_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_08_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_09_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_09_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_10_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_10_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_11_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_11_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_12_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_12_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_13_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_13_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_14_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_14_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i4 %x, -8
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrsgt_02_15_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_02_15_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp sgt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_00_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_00_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_01_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_01_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_02_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_02_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_03_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_03_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_04_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_04_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_05_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_05_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_06_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_06_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_07_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_07_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_08_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_08_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_09_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_09_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_10_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_10_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_11_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_11_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_12_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_12_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_13_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_13_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_14_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_14_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrsgt_03_15_exact(i4 %x) {
+; CHECK-LABEL: @ashrsgt_03_15_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 %x, -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp sgt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_00_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_00_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_01_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_01_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_02_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_02_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_03_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_03_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_04_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_04_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_05_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_05_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_06_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_06_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_07_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_07_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_08_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_08_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_09_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_09_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_10_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_10_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_11_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_11_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_12_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_12_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_13_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_13_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -6
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_14_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_14_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrslt_01_15_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_01_15_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 1
+  %c = icmp slt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_00_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_00_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_01_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_01_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_02_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_02_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_03_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_03_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_04_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_04_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_05_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_05_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_06_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_06_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_07_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_07_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_08_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_08_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_09_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_09_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_10_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_10_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_11_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_11_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_12_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_12_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_13_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_13_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_14_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_14_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrslt_02_15_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_02_15_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, -4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 2
+  %c = icmp slt i4 %s, 15
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_00_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_00_exact(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i4 %x, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 0
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_01_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_01_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 1
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_02_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_02_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 2
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_03_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_03_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 3
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_04_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_04_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 4
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_05_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_05_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 5
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_06_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_06_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 6
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_07_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_07_exact(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 7
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_08_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_08_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 8
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_09_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_09_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 9
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_10_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_10_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 10
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_11_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_11_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 11
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_12_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_12_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 12
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_13_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_13_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 13
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_14_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_14_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 14
+  ret i1 %c
+}
+
+define i1 @ashrslt_03_15_exact(i4 %x) {
+; CHECK-LABEL: @ashrslt_03_15_exact(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr exact i4 %x, 3
+  %c = icmp slt i4 %s, 15
+  ret i1 %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll
new file mode 100644
index 00000000000..214f315f317
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -0,0 +1,509 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i1 @lshr_eq_msb_low_last_zero(i8 %a) {
+; CHECK-LABEL: @lshr_eq_msb_low_last_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 %a, 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 127, %a
+  %cmp = icmp eq i8 %shr, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) {
+; CHECK-LABEL: @lshr_eq_msb_low_last_zero_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> %a, <i8 6, i8 6>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shr = lshr <2 x i8> <i8 127, i8 127>, %a
+  %cmp = icmp eq <2 x i8> %shr, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @ashr_eq_msb_low_second_zero(i8 %a) {
+; CHECK-LABEL: @ashr_eq_msb_low_second_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 %a, 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i8 127, %a
+  %cmp = icmp eq i8 %shr, 0
+  ret i1 %cmp
+}
+
+define i1 @lshr_ne_msb_low_last_zero(i8 %a) {
+; CHECK-LABEL: @lshr_ne_msb_low_last_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 127, %a
+  %cmp = icmp ne i8 %shr, 0
+  ret i1 %cmp
+}
+
+define i1 @ashr_ne_msb_low_second_zero(i8 %a) {
+; CHECK-LABEL: @ashr_ne_msb_low_second_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i8 127, %a
+  %cmp = icmp ne i8 %shr, 0
+  ret i1 %cmp
+}
+
+define i1 @ashr_eq_both_equal(i8 %a) {
+; CHECK-LABEL: @ashr_eq_both_equal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i8 128, %a
+  %cmp = icmp eq i8 %shr, 128
+  ret i1 %cmp
+}
+
+define i1 @ashr_ne_both_equal(i8 %a) {
+; CHECK-LABEL: @ashr_ne_both_equal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i8 128, %a
+  %cmp = icmp ne i8 %shr, 128
+  ret i1 %cmp
+}
+
+define i1 @lshr_eq_both_equal(i8 %a) {
+; CHECK-LABEL: @lshr_eq_both_equal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 127, %a
+  %cmp = icmp eq i8 %shr, 127
+  ret i1 %cmp
+}
+
+define i1 @lshr_ne_both_equal(i8 %a) {
+; CHECK-LABEL: @lshr_ne_both_equal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 127, %a
+  %cmp = icmp ne i8 %shr, 127
+  ret i1 %cmp
+}
+
+define i1 @exact_ashr_eq_both_equal(i8 %a) {
+; CHECK-LABEL: @exact_ashr_eq_both_equal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr exact i8 128, %a
+  %cmp = icmp eq i8 %shr, 128
+  ret i1 %cmp
+}
+
+define i1 @exact_ashr_ne_both_equal(i8 %a) {
+; CHECK-LABEL: @exact_ashr_ne_both_equal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr exact i8 128, %a
+  %cmp = icmp ne i8 %shr, 128
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_eq_both_equal(i8 %a) {
+; CHECK-LABEL: @exact_lshr_eq_both_equal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr exact i8 126, %a
+  %cmp = icmp eq i8 %shr, 126
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_ne_both_equal(i8 %a) {
+; CHECK-LABEL: @exact_lshr_ne_both_equal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr exact i8 126, %a
+  %cmp = icmp ne i8 %shr, 126
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_eq_opposite_msb(i8 %a) {
+; CHECK-LABEL: @exact_lshr_eq_opposite_msb(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr exact i8 -128, %a
+  %cmp = icmp eq i8 %shr, 1
+  ret i1 %cmp
+}
+
+define i1 @lshr_eq_opposite_msb(i8 %a) {
+; CHECK-LABEL: @lshr_eq_opposite_msb(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 -128, %a
+  %cmp = icmp eq i8 %shr, 1
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_ne_opposite_msb(i8 %a) {
+; CHECK-LABEL: @exact_lshr_ne_opposite_msb(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr exact i8 -128, %a
+  %cmp = icmp ne i8 %shr, 1
+  ret i1 %cmp
+}
+
+define i1 @lshr_ne_opposite_msb(i8 %a) {
+; CHECK-LABEL: @lshr_ne_opposite_msb(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 -128, %a
+  %cmp = icmp ne i8 %shr, 1
+  ret i1 %cmp
+}
+
+define i1 @exact_ashr_eq(i8 %a) {
+; CHECK-LABEL: @exact_ashr_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr exact i8 -128, %a
+  %cmp = icmp eq i8 %shr, -1
+  ret i1 %cmp
+}
+
+define i1 @exact_ashr_ne(i8 %a) {
+; CHECK-LABEL: @exact_ashr_ne(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr exact i8 -128, %a
+  %cmp = icmp ne i8 %shr, -1
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_eq(i8 %a) {
+; CHECK-LABEL: @exact_lshr_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr exact i8 4, %a
+  %cmp = icmp eq i8 %shr, 1
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_ne(i8 %a) {
+; CHECK-LABEL: @exact_lshr_ne(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr exact i8 4, %a
+  %cmp = icmp ne i8 %shr, 1
+  ret i1 %cmp
+}
+
+define i1 @nonexact_ashr_eq(i8 %a) {
+; CHECK-LABEL: @nonexact_ashr_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i8 -128, %a
+  %cmp = icmp eq i8 %shr, -1
+  ret i1 %cmp
+}
+
+define i1 @nonexact_ashr_ne(i8 %a) {
+; CHECK-LABEL: @nonexact_ashr_ne(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 7
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i8 -128, %a
+  %cmp = icmp ne i8 %shr, -1
+  ret i1 %cmp
+}
+
+define i1 @nonexact_lshr_eq(i8 %a) {
+; CHECK-LABEL: @nonexact_lshr_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 4, %a
+  %cmp = icmp eq i8 %shr, 1
+  ret i1 %cmp
+}
+
+define i1 @nonexact_lshr_ne(i8 %a) {
+; CHECK-LABEL: @nonexact_lshr_ne(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 4, %a
+  %cmp = icmp ne i8 %shr, 1
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_eq_exactdiv(i8 %a) {
+; CHECK-LABEL: @exact_lshr_eq_exactdiv(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr exact i8 80, %a
+  %cmp = icmp eq i8 %shr, 5
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_ne_exactdiv(i8 %a) {
+; CHECK-LABEL: @exact_lshr_ne_exactdiv(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr exact i8 80, %a
+  %cmp = icmp ne i8 %shr, 5
+  ret i1 %cmp
+}
+
+define i1 @nonexact_lshr_eq_exactdiv(i8 %a) {
+; CHECK-LABEL: @nonexact_lshr_eq_exactdiv(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 80, %a
+  %cmp = icmp eq i8 %shr, 5
+  ret i1 %cmp
+}
+
+define i1 @nonexact_lshr_ne_exactdiv(i8 %a) {
+; CHECK-LABEL: @nonexact_lshr_ne_exactdiv(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 80, %a
+  %cmp = icmp ne i8 %shr, 5
+  ret i1 %cmp
+}
+
+define i1 @exact_ashr_eq_exactdiv(i8 %a) {
+; CHECK-LABEL: @exact_ashr_eq_exactdiv(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr exact i8 -80, %a
+  %cmp = icmp eq i8 %shr, -5
+  ret i1 %cmp
+}
+
+define i1 @exact_ashr_ne_exactdiv(i8 %a) {
+; CHECK-LABEL: @exact_ashr_ne_exactdiv(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr exact i8 -80, %a
+  %cmp = icmp ne i8 %shr, -5
+  ret i1 %cmp
+}
+
+define i1 @nonexact_ashr_eq_exactdiv(i8 %a) {
+; CHECK-LABEL: @nonexact_ashr_eq_exactdiv(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 %a, 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i8 -80, %a
+  %cmp = icmp eq i8 %shr, -5
+  ret i1 %cmp
+}
+
+define i1 @nonexact_ashr_ne_exactdiv(i8 %a) {
+; CHECK-LABEL: @nonexact_ashr_ne_exactdiv(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 %a, 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i8 -80, %a
+  %cmp = icmp ne i8 %shr, -5
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_eq_noexactdiv(i8 %a) {
+; CHECK-LABEL: @exact_lshr_eq_noexactdiv(
+; CHECK-NEXT:    ret i1 false
+;
+  %shr = lshr exact i8 80, %a
+  %cmp = icmp eq i8 %shr, 31
+  ret i1 %cmp
+}
+
+define i1 @exact_lshr_ne_noexactdiv(i8 %a) {
+; CHECK-LABEL: @exact_lshr_ne_noexactdiv(
+; CHECK-NEXT:    ret i1 true
+;
+  %shr = lshr exact i8 80, %a
+  %cmp = icmp ne i8 %shr, 31
+  ret i1 %cmp
+}
+
+define i1 @nonexact_lshr_eq_noexactdiv(i8 %a) {
+; CHECK-LABEL: @nonexact_lshr_eq_noexactdiv(
+; CHECK-NEXT:    ret i1 false
+;
+  %shr = lshr i8 80, %a
+  %cmp = icmp eq i8 %shr, 31
+  ret i1 %cmp
+}
+
+define i1 @nonexact_lshr_ne_noexactdiv(i8 %a) {
+; CHECK-LABEL: @nonexact_lshr_ne_noexactdiv(
+; CHECK-NEXT:    ret i1 true
+;
+  %shr = lshr i8 80, %a
+  %cmp = icmp ne i8 %shr, 31
+  ret i1 %cmp
+}
+
+define i1 @exact_ashr_eq_noexactdiv(i8 %a) {
+; CHECK-LABEL: @exact_ashr_eq_noexactdiv(
+; CHECK-NEXT:    ret i1 false
+;
+  %shr = ashr exact i8 -80, %a
+  %cmp = icmp eq i8 %shr, -31
+  ret i1 %cmp
+}
+
+define i1 @exact_ashr_ne_noexactdiv(i8 %a) {
+; CHECK-LABEL: @exact_ashr_ne_noexactdiv(
+; CHECK-NEXT:    ret i1 true
+;
+  %shr = ashr exact i8 -80, %a
+  %cmp = icmp ne i8 %shr, -31
+  ret i1 %cmp
+}
+
+define i1 @nonexact_ashr_eq_noexactdiv(i8 %a) {
+; CHECK-LABEL: @nonexact_ashr_eq_noexactdiv(
+; CHECK-NEXT:    ret i1 false
+;
+  %shr = ashr i8 -80, %a
+  %cmp = icmp eq i8 %shr, -31
+  ret i1 %cmp
+}
+
+define i1 @nonexact_ashr_ne_noexactdiv(i8 %a) {
+; CHECK-LABEL: @nonexact_ashr_ne_noexactdiv(
+; CHECK-NEXT:    ret i1 true
+;
+  %shr = ashr i8 -80, %a
+  %cmp = icmp ne i8 %shr, -31
+  ret i1 %cmp
+}
+
+define i1 @nonexact_lshr_eq_noexactlog(i8 %a) {
+; CHECK-LABEL: @nonexact_lshr_eq_noexactlog(
+; CHECK-NEXT:    ret i1 false
+;
+  %shr = lshr i8 90, %a
+  %cmp = icmp eq i8 %shr, 30
+  ret i1 %cmp
+}
+
+define i1 @nonexact_lshr_ne_noexactlog(i8 %a) {
+; CHECK-LABEL: @nonexact_lshr_ne_noexactlog(
+; CHECK-NEXT:    ret i1 true
+;
+  %shr = lshr i8 90, %a
+  %cmp = icmp ne i8 %shr, 30
+  ret i1 %cmp
+}
+
+define i1 @nonexact_ashr_eq_noexactlog(i8 %a) {
+; CHECK-LABEL: @nonexact_ashr_eq_noexactlog(
+; CHECK-NEXT:    ret i1 false
+;
+  %shr = ashr i8 -90, %a
+  %cmp = icmp eq i8 %shr, -30
+  ret i1 %cmp
+}
+
+define i1 @nonexact_ashr_ne_noexactlog(i8 %a) {
+; CHECK-LABEL: @nonexact_ashr_ne_noexactlog(
+; CHECK-NEXT:    ret i1 true
+;
+  %shr = ashr i8 -90, %a
+  %cmp = icmp ne i8 %shr, -30
+  ret i1 %cmp
+}
+
+; Don't try to fold the entire body of function @PR20945 into a
+; single `ret i1 true` statement.
+; If %B is equal to 1, then this function would return false.
+; As a consequence, the instruction combiner is not allowed to fold %cmp
+; to 'true'. Instead, it should replace %cmp with a simpler comparison
+; between %B and 1.
+
+define i1 @PR20945(i32 %B) {
+; CHECK-LABEL: @PR20945(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 %B, 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i32 -9, %B
+  %cmp = icmp ne i32 %shr, -5
+  ret i1 %cmp
+}
+
+define i1 @PR21222(i32 %B) {
+; CHECK-LABEL: @PR21222(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %B, 6
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i32 -93, %B
+  %cmp = icmp eq i32 %shr, -2
+  ret i1 %cmp
+}
+
+define i1 @PR24873(i64 %V) {
+; CHECK-LABEL: @PR24873(
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 %V, 61
+; CHECK-NEXT:    ret i1 [[ICMP]]
+;
+  %ashr = ashr i64 -4611686018427387904, %V
+  %icmp = icmp eq i64 %ashr, -1
+  ret i1 %icmp
+}
+
+declare void @foo(i32)
+
+define i1 @exact_multiuse(i32 %x) {
+; CHECK-LABEL: @exact_multiuse(
+; CHECK-NEXT:    [[SH:%.*]] = lshr exact i32 %x, 7
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %x, 131072
+; CHECK-NEXT:    call void @foo(i32 [[SH]])
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sh = lshr exact i32 %x, 7
+  %cmp = icmp eq i32 %sh, 1024
+  call void @foo(i32 %sh)
+  ret i1 %cmp
+}
+
+declare void @foo2(<2 x i32>)
+define <2 x i1> @exact_eq0_multiuse(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @exact_eq0_multiuse(
+; CHECK-NEXT:    [[SH:%.*]] = ashr exact <2 x i32> %x, %y
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[SH]], zeroinitializer
+; CHECK-NEXT:    call void @foo2(<2 x i32> [[SH]])
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %sh = ashr exact <2 x i32> %x, %y
+  %cmp = icmp eq <2 x i32> %sh, zeroinitializer
+  call void @foo2(<2 x i32> %sh)
+  ret <2 x i1> %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll
new file mode 100644
index 00000000000..c66581b3b5a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test_nuw_and_unsigned_pred(i64 %x) {
+; CHECK-LABEL: @test_nuw_and_unsigned_pred(
+; CHECK-NEXT:    [[Z:%.*]] = icmp ugt i64 [[X:%.*]], 7
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = sub nuw i64 10, %x
+  %z = icmp ult i64 %y, 3
+  ret i1 %z
+}
+
+define i1 @test_nsw_and_signed_pred(i64 %x) {
+; CHECK-LABEL: @test_nsw_and_signed_pred(
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt i64 [[X:%.*]], -7
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = sub nsw i64 3, %x
+  %z = icmp sgt i64 %y, 10
+  ret i1 %z
+}
+
+define i1 @test_nuw_nsw_and_unsigned_pred(i64 %x) {
+; CHECK-LABEL: @test_nuw_nsw_and_unsigned_pred(
+; CHECK-NEXT:    [[Z:%.*]] = icmp ugt i64 [[X:%.*]], 6
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = sub nuw nsw i64 10, %x
+  %z = icmp ule i64 %y, 3
+  ret i1 %z
+}
+
+define i1 @test_nuw_nsw_and_signed_pred(i64 %x) {
+; CHECK-LABEL: @test_nuw_nsw_and_signed_pred(
+; CHECK-NEXT:    [[Z:%.*]] = icmp sgt i64 [[X:%.*]], 7
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = sub nuw nsw i64 10, %x
+  %z = icmp slt i64 %y, 3
+  ret i1 %z
+}
+
+define i1 @test_negative_nuw_and_signed_pred(i64 %x) {
+; CHECK-LABEL: @test_negative_nuw_and_signed_pred(
+; CHECK-NEXT:    [[Y:%.*]] = sub nuw i64 10, [[X:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt i64 [[Y]], 3
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = sub nuw i64 10, %x
+  %z = icmp slt i64 %y, 3
+  ret i1 %z
+}
+
+define i1 @test_negative_nsw_and_unsigned_pred(i64 %x) {
+; CHECK-LABEL: @test_negative_nsw_and_unsigned_pred(
+; CHECK-NEXT:    [[Y:%.*]] = sub nsw i64 10, [[X:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp ult i64 [[Y]], 3
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = sub nsw i64 10, %x
+  %z = icmp ult i64 %y, 3
+  ret i1 %z
+}
+
+define i1 @test_negative_combined_sub_unsigned_overflow(i64 %x) {
+; CHECK-LABEL: @test_negative_combined_sub_unsigned_overflow(
+; CHECK-NEXT:    [[Y:%.*]] = sub nuw i64 10, [[X:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp ult i64 [[Y]], 11
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = sub nuw i64 10, %x
+  %z = icmp ult i64 %y, 11
+  ret i1 %z
+}
+
+define i1 @test_negative_combined_sub_signed_overflow(i8 %x) {
+; CHECK-LABEL: @test_negative_combined_sub_signed_overflow(
+; CHECK-NEXT:    [[Y:%.*]] = sub nsw i8 127, [[X:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt i8 [[Y]], -1
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = sub nsw i8 127, %x
+  %z = icmp slt i8 %y, -1
+  ret i1 %z
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
new file mode 100644
index 00000000000..a3d9cca7999
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
@@ -0,0 +1,260 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38708
+
+; Pattern:
+;   ((1 << bits)+(-1)) u>= val
+; Should be transformed into:
+;   (val l>> bits) == 0
+
+; NOTE: the innermost shl is not one-use. Else canonicalization happens.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %bits
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 -1>
+  %r = icmp uge <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %r = icmp uge <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef1(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 1, i8 1>, %bits
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %r = icmp uge <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef2(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef2(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %r = icmp uge <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %bits) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %val = call i8 @gen8()
+  %r = icmp ule i8 %val, %t1 ; swapped order and predicate
+  ret i1 %r
+}
+
+; What if we have the same pattern on both sides?
+define i1 @both(i8 %bits0, i8 %bits1) {
+; CHECK-LABEL: @both(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS0:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T2:%.*]] = shl i8 1, [[BITS1:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[T3:%.*]] = add i8 [[T2]], -1
+; CHECK-NEXT:    [[T3_HIGHBITS:%.*]] = lshr i8 [[T3]], [[BITS0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[T3_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits0
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = shl i8 1, %bits1
+  call void @use8(i8 %t2)
+  %t3 = add i8 %t2, -1
+  %r = icmp uge i8 %t1, %t3
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+define i1 @oneuse(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0) ; this is needed anyway
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits ; constant is not 1
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+define i1 @n1(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, 1 ; constant is not -1
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n2_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 -1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 -1>, %bits ; again, wrong constant
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 -1>
+  %r = icmp uge <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n3_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], <i8 -1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %bits
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 1> ; again, wrong constant
+  %r = icmp uge <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define i1 @n3(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %r = icmp ugt i8 %t1, %val ; wrong predicate
+  ret i1 %r
+}
+
+define i1 @n4(i8 %bits) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[VAL]], [[T1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %val = call i8 @gen8()
+  %r = icmp ult i8 %val, %t1 ; swapped order and [wrong] predicate
+  ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
new file mode 100644
index 00000000000..a326846332d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
@@ -0,0 +1,250 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38708
+
+; Pattern:
+;   ~(-1 << bits) u>= val
+; Should be transformed into:
+;   (val l>> bits) == 0
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %bits
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 -1>
+  %r = icmp uge <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %bits
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %r = icmp uge <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef1(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %bits
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %r = icmp uge <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef2(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef2(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %bits
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %r = icmp uge <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %bits) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  %val = call i8 @gen8()
+  %r = icmp ule i8 %val, %t1 ; swapped order and predicate
+  ret i1 %r
+}
+
+; What if we have the same pattern on both sides?
+define i1 @both(i8 %bits0, i8 %bits1) {
+; CHECK-LABEL: @both(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = shl i8 -1, [[BITS1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T2]], [[T0]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits0
+  %t1 = xor i8 %t0, -1
+  %t2 = shl i8 -1, %bits1
+  %t3 = xor i8 %t2, -1
+  %r = icmp uge i8 %t1, %t3
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+define i1 @oneuse1(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+define i1 @oneuse2(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits ; constant is not -1
+  %t1 = xor i8 %t0, -1
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+define i1 @n1(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, 1 ; not 'not'
+  %r = icmp uge i8 %t1, %val
+  ret i1 %r
+}
+
+define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n2_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 1>, %bits ; again, wrong constant
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 -1>
+  %r = icmp uge <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n3_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 -1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], <i8 -1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %bits
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 1> ; again, wrong constant
+  %r = icmp uge <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define i1 @n3(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  %r = icmp ugt i8 %t1, %val ; wrong predicate
+  ret i1 %r
+}
+
+define i1 @n4(i8 %bits) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[VAL]], [[T1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  %val = call i8 @gen8()
+  %r = icmp ult i8 %val, %t1 ; swapped order and [wrong] predicate
+  ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-ugt-of-shl-1-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ugt-of-shl-1-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
new file mode 100644
index 00000000000..07576677f58
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-ugt-of-shl-1-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
@@ -0,0 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38708
+
+; Pattern:
+;   (1 << bits) u> val
+; Should be transformed into:
+;   (val l>> bits) == 0
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  %r = icmp ugt i8 %t0, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %bits
+  %r = icmp ugt <2 x i8> %t0, %val
+  ret <2 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits
+  %r = icmp ugt <3 x i8> %t0, %val
+  ret <3 x i1> %r
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %bits) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  %val = call i8 @gen8()
+  %r = icmp ult i8 %val, %t0 ; swapped order and predicate
+  ret i1 %r
+}
+
+; What if we have the same pattern on both sides?
+define i1 @both(i8 %bits0, i8 %bits1) {
+; CHECK-LABEL: @both(
+; CHECK-NEXT:    [[T1:%.*]] = shl i8 1, [[BITS1:%.*]]
+; CHECK-NEXT:    [[T1_HIGHBITS:%.*]] = lshr i8 [[T1]], [[BITS0:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[T1_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits0
+  %t1 = shl i8 1, %bits1
+  %r = icmp ugt i8 %t0, %t1
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[T0]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %r = icmp ugt i8 %t0, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 2, [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[T0]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 2, %bits ; constant is not 1
+  %r = icmp ugt i8 %t0, %val
+  ret i1 %r
+}
+
+define <2 x i1> @n1_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n1_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 2>, [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[T0]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 2>, %bits ; again, wrong constant
+  %r = icmp ugt <2 x i8> %t0, %val
+  ret <2 x i1> %r
+}
+
+define i1 @n2(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[T0]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  %r = icmp uge i8 %t0, %val ; wrong predicate
+  ret i1 %r
+}
+
+define i1 @n3(i8 %bits) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[VAL]], [[T0]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  %val = call i8 @gen8()
+  %r = icmp ule i8 %val, %t0 ; swapped order and [wrong] predicate
+  ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-ule-of-shl-1-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ule-of-shl-1-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
new file mode 100644
index 00000000000..b9d03393939
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-ule-of-shl-1-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
@@ -0,0 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38708
+
+; Pattern:
+;   (1 << bits) u<= val
+; Should be transformed into:
+;   (val l>> bits) != 0
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  %r = icmp ule i8 %t0, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %bits
+  %r = icmp ule <2 x i8> %t0, %val
+  ret <2 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits
+  %r = icmp ule <3 x i8> %t0, %val
+  ret <3 x i1> %r
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %bits) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  %val = call i8 @gen8()
+  %r = icmp uge i8 %val, %t0 ; swapped order and predicate
+  ret i1 %r
+}
+
+; What if we have the same pattern on both sides?
+define i1 @both(i8 %bits0, i8 %bits1) {
+; CHECK-LABEL: @both(
+; CHECK-NEXT:    [[T1:%.*]] = shl i8 1, [[BITS1:%.*]]
+; CHECK-NEXT:    [[T1_HIGHBITS:%.*]] = lshr i8 [[T1]], [[BITS0:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[T1_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits0
+  %t1 = shl i8 1, %bits1
+  %r = icmp ule i8 %t0, %t1
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[T0]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %r = icmp ule i8 %t0, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 2, [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[T0]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 2, %bits ; constant is not 1
+  %r = icmp ule i8 %t0, %val
+  ret i1 %r
+}
+
+define <2 x i1> @n1_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n1_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 2>, [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ule <2 x i8> [[T0]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 2>, %bits ; again, wrong constant
+  %r = icmp ule <2 x i8> %t0, %val
+  ret <2 x i1> %r
+}
+
+define i1 @n2(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T0]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  %r = icmp ult i8 %t0, %val ; wrong predicate
+  ret i1 %r
+}
+
+define i1 @n3(i8 %bits) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[VAL]], [[T0]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  %val = call i8 @gen8()
+  %r = icmp ugt i8 %val, %t0 ; swapped order and [wrong] predicate
+  ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
new file mode 100644
index 00000000000..2de7e432ac2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
@@ -0,0 +1,260 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38708
+
+; Pattern:
+;   ((1 << bits)+(-1)) u< val
+; Should be transformed into:
+;   (val l>> bits) != 0
+
+; NOTE: the innermost shl is not one-use. Else canonicalization happens.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %bits
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 -1>
+  %r = icmp ult <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %r = icmp ult <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef1(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 1, i8 1>, %bits
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %r = icmp ult <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef2(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef2(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %r = icmp ult <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %bits) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %val = call i8 @gen8()
+  %r = icmp ugt i8 %val, %t1 ; swapped order and predicate
+  ret i1 %r
+}
+
+; What if we have the same pattern on both sides?
+define i1 @both(i8 %bits0, i8 %bits1) {
+; CHECK-LABEL: @both(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS0:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T2:%.*]] = shl i8 1, [[BITS1:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[T3:%.*]] = add i8 [[T2]], -1
+; CHECK-NEXT:    [[T3_HIGHBITS:%.*]] = lshr i8 [[T3]], [[BITS0]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[T3_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits0
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = shl i8 1, %bits1
+  call void @use8(i8 %t2)
+  %t3 = add i8 %t2, -1
+  %r = icmp ult i8 %t1, %t3
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+define i1 @oneuse(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0) ; this is needed anyway
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits ; constant is not 1
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+define i1 @n1(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, 1 ; constant is not -1
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n2_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 -1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 -1>, %bits ; again, wrong constant
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 -1>
+  %r = icmp ult <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n3_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], <i8 -1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %bits
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 1> ; again, wrong constant
+  %r = icmp ult <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define i1 @n3(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %r = icmp ule i8 %t1, %val ; wrong predicate
+  ret i1 %r
+}
+
+define i1 @n4(i8 %bits) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[VAL]], [[T1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %val = call i8 @gen8()
+  %r = icmp uge i8 %val, %t1 ; swapped order and [wrong] predicate
+  ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
new file mode 100644
index 00000000000..f97d24374da
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
@@ -0,0 +1,250 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38708
+
+; Pattern:
+;   ~(-1 << bits) u< val
+; Should be transformed into:
+;   (val l>> bits) != 0
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %bits
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 -1>
+  %r = icmp ult <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %bits
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %r = icmp ult <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef1(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %bits
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %r = icmp ult <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+define <3 x i1> @p2_vec_undef2(<3 x i8> %val, <3 x i8> %bits) {
+; CHECK-LABEL: @p2_vec_undef2(
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[R]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %bits
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %r = icmp ult <3 x i8> %t1, %val
+  ret <3 x i1> %r
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %bits) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL]], [[BITS:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  %val = call i8 @gen8()
+  %r = icmp ugt i8 %val, %t1 ; swapped order and predicate
+  ret i1 %r
+}
+
+; What if we have the same pattern on both sides?
+define i1 @both(i8 %bits0, i8 %bits1) {
+; CHECK-LABEL: @both(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = shl i8 -1, [[BITS1:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T2]], [[T0]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits0
+  %t1 = xor i8 %t0, -1
+  %t2 = shl i8 -1, %bits1
+  %t3 = xor i8 %t2, -1
+  %r = icmp ult i8 %t1, %t3
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr i8 [[VAL:%.*]], [[BITS]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[VAL_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+define i1 @oneuse1(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+define i1 @oneuse2(i8 %val, i8 %bits) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 1, %bits ; constant is not -1
+  %t1 = xor i8 %t0, -1
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+define i1 @n1(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, 1 ; not 'not'
+  %r = icmp ult i8 %t1, %val
+  ret i1 %r
+}
+
+define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n2_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 1>, %bits ; again, wrong constant
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 -1>
+  %r = icmp ult <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
+; CHECK-LABEL: @n3_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 -1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], <i8 -1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %bits
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 1> ; again, wrong constant
+  %r = icmp ult <2 x i8> %t1, %val
+  ret <2 x i1> %r
+}
+
+define i1 @n3(i8 %val, i8 %bits) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[R:%.*]] = icmp ule i8 [[T1]], [[VAL:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  %r = icmp ule i8 %t1, %val ; wrong predicate
+  ret i1 %r
+}
+
+define i1 @n4(i8 %bits) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[BITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[VAL]], [[T1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %t0 = shl i8 -1, %bits
+  %t1 = xor i8 %t0, -1
+  %val = call i8 @gen8()
+  %r = icmp uge i8 %val, %t1 ; swapped order and [wrong] predicate
+  ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll
new file mode 100644
index 00000000000..cb83db5f8fa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Canonicalize vector ge/le comparisons with constants to gt/lt.
+
+; Normal types are ConstantDataVectors. Test the constant values adjacent to the
+; min/max values that we're not allowed to transform.
+
+define <2 x i1> @sge(<2 x i8> %x) {
+; CHECK-LABEL: @sge(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -128, i8 126>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sge <2 x i8> %x, <i8 -127, i8 -129>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @uge(<2 x i8> %x) {
+; CHECK-LABEL: @uge(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 -2, i8 0>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp uge <2 x i8> %x, <i8 -1, i8 1>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @sle(<2 x i8> %x) {
+; CHECK-LABEL: @sle(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 127, i8 -127>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sle <2 x i8> %x, <i8 126, i8 128>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ule(<2 x i8> %x) {
+; CHECK-LABEL: @ule(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 -1, i8 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp ule <2 x i8> %x, <i8 254, i8 0>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ult_min_signed_value(<2 x i8> %x) {
+; CHECK-LABEL: @ult_min_signed_value(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp ult <2 x i8> %x, <i8 128, i8 128>
+  ret <2 x i1> %cmp
+}
+
+; Zeros are special: they're ConstantAggregateZero.
+
+define <2 x i1> @sge_zero(<2 x i8> %x) {
+; CHECK-LABEL: @sge_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sge <2 x i8> %x, <i8 0, i8 0>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @uge_zero(<2 x i8> %x) {
+; CHECK-LABEL: @uge_zero(
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
+;
+  %cmp = icmp uge <2 x i8> %x, <i8 0, i8 0>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @sle_zero(<2 x i8> %x) {
+; CHECK-LABEL: @sle_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sle <2 x i8> %x, <i8 0, i8 0>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ule_zero(<2 x i8> %x) {
+; CHECK-LABEL: @ule_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp ule <2 x i8> %x, <i8 0, i8 0>
+  ret <2 x i1> %cmp
+}
+
+; Weird types are ConstantVectors, not ConstantDataVectors. For an i3 type:
+; Signed min = -4
+; Unsigned min = 0
+; Signed max = 3
+; Unsigned max = 7
+
+define <3 x i1> @sge_weird(<3 x i3> %x) {
+; CHECK-LABEL: @sge_weird(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <3 x i3> [[X:%.*]], <i3 -4, i3 2, i3 -1>
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %cmp = icmp sge <3 x i3> %x, <i3 -3, i3 -5, i3 0>
+  ret <3 x i1> %cmp
+}
+
+define <3 x i1> @uge_weird(<3 x i3> %x) {
+; CHECK-LABEL: @uge_weird(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <3 x i3> [[X:%.*]], <i3 -2, i3 0, i3 1>
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %cmp = icmp uge <3 x i3> %x, <i3 -1, i3 1, i3 2>
+  ret <3 x i1> %cmp
+}
+
+define <3 x i1> @sle_weird(<3 x i3> %x) {
+; CHECK-LABEL: @sle_weird(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <3 x i3> [[X:%.*]], <i3 3, i3 -3, i3 1>
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %cmp = icmp sle <3 x i3> %x, <i3 2, i3 4, i3 0>
+  ret <3 x i1> %cmp
+}
+
+define <3 x i1> @ule_weird(<3 x i3> %x) {
+; CHECK-LABEL: @ule_weird(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <3 x i3> [[X:%.*]], <i3 -1, i3 1, i3 2>
+; CHECK-NEXT:    ret <3 x i1> [[CMP]]
+;
+  %cmp = icmp ule <3 x i3> %x, <i3 6, i3 0, i3 1>
+  ret <3 x i1> %cmp
+}
+
+; We can't do the transform if any constants are already at the limits.
+
+define <2 x i1> @sge_min(<2 x i3> %x) {
+; CHECK-LABEL: @sge_min(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge <2 x i3> [[X:%.*]], <i3 -4, i3 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sge <2 x i3> %x, <i3 -4, i3 1>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @uge_min(<2 x i3> %x) {
+; CHECK-LABEL: @uge_min(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge <2 x i3> [[X:%.*]], <i3 1, i3 0>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp uge <2 x i3> %x, <i3 1, i3 0>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @sle_max(<2 x i3> %x) {
+; CHECK-LABEL: @sle_max(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle <2 x i3> [[X:%.*]], <i3 1, i3 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sle <2 x i3> %x, <i3 1, i3 3>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ule_max(<2 x i3> %x) {
+; CHECK-LABEL: @ule_max(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule <2 x i3> [[X:%.*]], <i3 -1, i3 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp ule <2 x i3> %x, <i3 7, i3 1>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @PR27756_1(<2 x i8> %a) {
+; CHECK-LABEL: @PR27756_1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], <i8 34, i8 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sle <2 x i8> %a, <i8 bitcast (<2 x i4> <i4 1, i4 2> to i8), i8 0>
+  ret <2 x i1> %cmp
+}
+
+; Undef elements don't prevent the transform of the comparison.
+
+define <2 x i1> @PR27756_2(<2 x i8> %a) {
+; CHECK-LABEL: @PR27756_2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], <i8 undef, i8 1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sle <2 x i8> %a, <i8 undef, i8 0>
+  ret <2 x i1> %cmp
+}
+
+@someglobal = global i32 0
+
+define <2 x i1> @PR27786(<2 x i8> %a) {
+; CHECK-LABEL: @PR27786(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle <2 x i8> [[A:%.*]], bitcast (i16 ptrtoint (i32* @someglobal to i16) to <2 x i8>)
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp sle <2 x i8> %a, bitcast (i16 ptrtoint (i32* @someglobal to i16) to <2 x i8>)
+  ret <2 x i1> %cmp
+}
+
+; This is similar to a transform for shuffled binops: compare first, shuffle after.
+
+define <4 x i1> @same_shuffle_inputs_icmp(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @same_shuffle_inputs_icmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 2, i32 0>
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %shufx = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> < i32 3, i32 3, i32 2, i32 0 >
+  %shufy = shufflevector <4 x i8> %y, <4 x i8> undef, <4 x i32> < i32 3, i32 3, i32 2, i32 0 >
+  %cmp = icmp sgt <4 x i8> %shufx, %shufy
+  ret <4 x i1> %cmp
+}
+
+; fcmp and size-changing shuffles are ok too.
+
+define <5 x i1> @same_shuffle_inputs_fcmp(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @same_shuffle_inputs_fcmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq <4 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <5 x i32> <i32 0, i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT:    ret <5 x i1> [[CMP]]
+;
+  %shufx = shufflevector <4 x float> %x, <4 x float> undef, <5 x i32> < i32 0, i32 1, i32 3, i32 2, i32 0 >
+  %shufy = shufflevector <4 x float> %y, <4 x float> undef, <5 x i32> < i32 0, i32 1, i32 3, i32 2, i32 0 >
+  %cmp = fcmp oeq <5 x float> %shufx, %shufy
+  ret <5 x i1> %cmp
+}
+
+declare void @use_v4i8(<4 x i8>)
+
+define <4 x i1> @same_shuffle_inputs_icmp_extra_use1(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use1(
+; CHECK-NEXT:    [[SHUFX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i8> [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    call void @use_v4i8(<4 x i8> [[SHUFX]])
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %shufx = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >
+  %shufy = shufflevector <4 x i8> %y, <4 x i8> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >
+  %cmp = icmp ugt <4 x i8> %shufx, %shufy
+  call void @use_v4i8(<4 x i8> %shufx)
+  ret <4 x i1> %cmp
+}
+
+declare void @use_v2i8(<2 x i8>)
+
+define <2 x i1> @same_shuffle_inputs_icmp_extra_use2(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use2(
+; CHECK-NEXT:    [[SHUFY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 3, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <2 x i32> <i32 3, i32 2>
+; CHECK-NEXT:    call void @use_v2i8(<2 x i8> [[SHUFY]])
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shufx = shufflevector <4 x i8> %x, <4 x i8> undef, <2 x i32> < i32 3, i32 2 >
+  %shufy = shufflevector <4 x i8> %y, <4 x i8> undef, <2 x i32> < i32 3, i32 2 >
+  %cmp = icmp eq <2 x i8> %shufx, %shufy
+  call void @use_v2i8(<2 x i8> %shufy)
+  ret <2 x i1> %cmp
+}
+
+; Negative test: if both shuffles have extra uses, don't transform because that would increase instruction count.
+
+define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use3(
+; CHECK-NEXT:    [[SHUFX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHUFY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[SHUFX]], [[SHUFY]]
+; CHECK-NEXT:    call void @use_v2i8(<2 x i8> [[SHUFX]])
+; CHECK-NEXT:    call void @use_v2i8(<2 x i8> [[SHUFY]])
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shufx = shufflevector <4 x i8> %x, <4 x i8> undef, <2 x i32> < i32 0, i32 0 >
+  %shufy = shufflevector <4 x i8> %y, <4 x i8> undef, <2 x i32> < i32 0, i32 0 >
+  %cmp = icmp eq <2 x i8> %shufx, %shufy
+  call void @use_v2i8(<2 x i8> %shufx)
+  call void @use_v2i8(<2 x i8> %shufy)
+  ret <2 x i1> %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll
new file mode 100644
index 00000000000..dab9b5e9fef
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
+
+define i1 @slt_to_ult(i8 %x, i8 %y) {
+; CHECK-LABEL: @slt_to_ult(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 128
+  %b = xor i8 %y, 128
+  %cmp = icmp slt i8 %a, %b
+  ret i1 %cmp
+}
+
+; PR33138 - https://bugs.llvm.org/show_bug.cgi?id=33138
+
+define <2 x i1> @slt_to_ult_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @slt_to_ult_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 128, i8 128>
+  %b = xor <2 x i8> %y, <i8 128, i8 128>
+  %cmp = icmp slt <2 x i8> %a, %b
+  ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @ult_to_slt(i8 %x, i8 %y) {
+; CHECK-LABEL: @ult_to_slt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 128
+  %b = xor i8 %y, 128
+  %cmp = icmp ult i8 %a, %b
+  ret i1 %cmp
+}
+
+define <2 x i1> @ult_to_slt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @ult_to_slt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 128, i8 128>
+  %b = xor <2 x i8> %y, <i8 128, i8 128>
+  %cmp = icmp ult <2 x i8> %a, %b
+  ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
+
+define i1 @slt_to_ugt(i8 %x, i8 %y) {
+; CHECK-LABEL: @slt_to_ugt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 127
+  %b = xor i8 %y, 127
+  %cmp = icmp slt i8 %a, %b
+  ret i1 %cmp
+}
+
+define <2 x i1> @slt_to_ugt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @slt_to_ugt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 127, i8 127>
+  %b = xor <2 x i8> %y, <i8 127, i8 127>
+  %cmp = icmp slt <2 x i8> %a, %b
+  ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @ult_to_sgt(i8 %x, i8 %y) {
+; CHECK-LABEL: @ult_to_sgt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 127
+  %b = xor i8 %y, 127
+  %cmp = icmp ult i8 %a, %b
+  ret i1 %cmp
+}
+
+define <2 x i1> @ult_to_sgt_splat(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @ult_to_sgt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> %x, %y
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 127, i8 127>
+  %b = xor <2 x i8> %y, <i8 127, i8 127>
+  %cmp = icmp ult <2 x i8> %a, %b
+  ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ signmask), C --> icmp s/u a, C'
+
+define i1 @sge_to_ugt(i8 %x) {
+; CHECK-LABEL: @sge_to_ugt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 %x, -114
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 128
+  %cmp = icmp sge i8 %a, 15
+  ret i1 %cmp
+}
+
+define <2 x i1> @sge_to_ugt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sge_to_ugt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> %x, <i8 -114, i8 -114>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 128, i8 128>
+  %cmp = icmp sge <2 x i8> %a, <i8 15, i8 15>
+  ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @uge_to_sgt(i8 %x) {
+; CHECK-LABEL: @uge_to_sgt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 %x, -114
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 128
+  %cmp = icmp uge i8 %a, 15
+  ret i1 %cmp
+}
+
+define <2 x i1> @uge_to_sgt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @uge_to_sgt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -114, i8 -114>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 128, i8 128>
+  %cmp = icmp uge <2 x i8> %a, <i8 15, i8 15>
+  ret <2 x i1> %cmp
+}
+
+; icmp u/s (a ^ maxsignval), C --> icmp s/u' a, C'
+
+define i1 @sge_to_ult(i8 %x) {
+; CHECK-LABEL: @sge_to_ult(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 %x, 113
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 127
+  %cmp = icmp sge i8 %a, 15
+  ret i1 %cmp
+}
+
+define <2 x i1> @sge_to_ult_splat(<2 x i8> %x) {
+; CHECK-LABEL: @sge_to_ult_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> %x, <i8 113, i8 113>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 127, i8 127>
+  %cmp = icmp sge <2 x i8> %a, <i8 15, i8 15>
+  ret <2 x i1> %cmp
+}
+
+; Make sure that unsigned -> signed works too.
+
+define i1 @uge_to_slt(i8 %x) {
+; CHECK-LABEL: @uge_to_slt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 113
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a = xor i8 %x, 127
+  %cmp = icmp uge i8 %a, 15
+  ret i1 %cmp
+}
+
+define <2 x i1> @uge_to_slt_splat(<2 x i8> %x) {
+; CHECK-LABEL: @uge_to_slt_splat(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> %x, <i8 113, i8 113>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %a = xor <2 x i8> %x, <i8 127, i8 127>
+  %cmp = icmp uge <2 x i8> %a, <i8 15, i8 15>
+  ret <2 x i1> %cmp
+}
+
+; PR33138, part 2: https://bugs.llvm.org/show_bug.cgi?id=33138
+; Bitcast canonicalization ensures that we recognize the signbit constant.
+
+define <8 x i1> @sgt_to_ugt_bitcasted_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @sgt_to_ugt_bitcasted_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> %x to <8 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> %y to <8 x i8>
+; CHECK-NEXT:    [[E:%.*]] = icmp ugt <8 x i8> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <8 x i1> [[E]]
+;
+  %a = xor <2 x i32> %x, <i32 2155905152, i32 2155905152> ; 0x80808080
+  %b = xor <2 x i32> %y, <i32 2155905152, i32 2155905152>
+  %c = bitcast <2 x i32> %a to <8 x i8>
+  %d = bitcast <2 x i32> %b to <8 x i8>
+  %e = icmp sgt <8 x i8> %c, %d
+  ret <8 x i1> %e
+}
+
+; Bitcast canonicalization ensures that we recognize the signbit constant.
+
+define <2 x i1> @negative_simplify_splat(<4 x i8> %x) {
+; CHECK-LABEL: @negative_simplify_splat(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %a = or <4 x i8> %x, <i8 0, i8 128, i8 0, i8 128>
+  %b = bitcast <4 x i8> %a to <2 x i16>
+  %c = icmp sgt <2 x i16> %b, zeroinitializer
+  ret <2 x i1> %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
new file mode 100644
index 00000000000..3fecf97e89d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -0,0 +1,3477 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i32 @test1(i32 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr i32 [[X:%.*]], 31
+; CHECK-NEXT:    ret i32 [[X_LOBIT]]
+;
+  %a = icmp slt i32 %X, 0
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+define <2 x i32> @test1vec(<2 x i32> %X) {
+; CHECK-LABEL: @test1vec(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[X_LOBIT]]
+;
+  %a = icmp slt <2 x i32> %X, zeroinitializer
+  %b = zext <2 x i1> %a to <2 x i32>
+  ret <2 x i32> %b
+}
+
+define i32 @test2(i32 %X) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[X_LOBIT_NOT:%.*]] = xor i32 [[X_LOBIT]], 1
+; CHECK-NEXT:    ret i32 [[X_LOBIT_NOT]]
+;
+  %a = icmp ult i32 %X, -2147483648
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+define <2 x i32> @test2vec(<2 x i32> %X) {
+; CHECK-LABEL: @test2vec(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[X_LOBIT_NOT:%.*]] = xor <2 x i32> [[X_LOBIT]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[X_LOBIT_NOT]]
+;
+  %a = icmp ult <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
+  %b = zext <2 x i1> %a to <2 x i32>
+  ret <2 x i32> %b
+}
+
+define i32 @test3(i32 %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
+; CHECK-NEXT:    ret i32 [[X_LOBIT]]
+;
+  %a = icmp slt i32 %X, 0
+  %b = sext i1 %a to i32
+  ret i32 %b
+}
+
+define i32 @test4(i32 %X) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[X_LOBIT_NOT:%.*]] = xor i32 [[X_LOBIT]], -1
+; CHECK-NEXT:    ret i32 [[X_LOBIT_NOT]]
+;
+  %a = icmp ult i32 %X, -2147483648
+  %b = sext i1 %a to i32
+  ret i32 %b
+}
+
+; PR4837
+define <2 x i1> @test5_eq(<2 x i64> %x) {
+; CHECK-LABEL: @test5_eq(
+; CHECK-NEXT:    ret <2 x i1> undef
+;
+  %V = icmp eq <2 x i64> %x, undef
+  ret <2 x i1> %V
+}
+define <2 x i1> @test5_ne(<2 x i64> %x) {
+; CHECK-LABEL: @test5_ne(
+; CHECK-NEXT:    ret <2 x i1> undef
+;
+  %V = icmp ne <2 x i64> %x, undef
+  ret <2 x i1> %V
+}
+define <2 x i1> @test5_ugt(<2 x i64> %x) {
+; CHECK-LABEL: @test5_ugt(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %V = icmp ugt <2 x i64> %x, undef
+  ret <2 x i1> %V
+}
+define <2 x i1> @test5_zero() {
+; CHECK-LABEL: @test5_zero(
+; CHECK-NEXT:    ret <2 x i1> undef
+;
+  %V = icmp eq <2 x i64> zeroinitializer, undef
+  ret <2 x i1> %V
+}
+
+define i32 @test6(i32 %a, i32 %b) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[E:%.*]] = ashr i32 [[A:%.*]], 31
+; CHECK-NEXT:    [[F:%.*]] = and i32 [[E]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[F]]
+;
+  %c = icmp sle i32 %a, -1
+  %d = zext i1 %c to i32
+  %e = sub i32 0, %d
+  %f = and i32 %e, %b
+  ret i32 %f
+}
+
+
+define i1 @test7(i32 %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = add i32 %x, -1
+  %b = icmp ult i32 %a, %x
+  ret i1 %b
+}
+
+define <2 x i1> @test7_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test7_vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[B]]
+;
+  %a = add <2 x i32> %x, <i32 -1, i32 -1>
+  %b = icmp ult <2 x i32> %a, %x
+  ret <2 x i1> %b
+}
+
+define i1 @test8(i32 %x) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = add i32 %x, -1
+  %b = icmp eq i32 %a, %x
+  ret i1 %b
+}
+
+define <2 x i1> @test8_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test8_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %a = add <2 x i32> %x, <i32 -1, i32 -1>
+  %b = icmp eq <2 x i32> %a, %x
+  ret <2 x i1> %b
+}
+
+define i1 @test9(i32 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i32 [[X:%.*]], 1
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = add i32 %x, -2
+  %b = icmp ugt i32 %x, %a
+  ret i1 %b
+}
+
+define <2 x i1> @test9_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test9_vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[B]]
+;
+  %a = add <2 x i32> %x, <i32 -2, i32 -2>
+  %b = icmp ugt <2 x i32> %x, %a
+  ret <2 x i1> %b
+}
+
+define i1 @test9b(i32 %x) {
+; CHECK-LABEL: @test9b(
+; CHECK-NEXT:    [[B:%.*]] = icmp ult i32 [[X:%.*]], 2
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = add i32 %x, -2
+  %b = icmp ugt i32 %a, %x
+  ret i1 %b
+}
+
+define <2 x i1> @test9b_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test9b_vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[B]]
+;
+  %a = add <2 x i32> %x, <i32 -2, i32 -2>
+  %b = icmp ugt <2 x i32> %a, %x
+  ret <2 x i1> %b
+}
+
+define i1 @test10(i32 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = add i32 %x, -1
+  %b = icmp slt i32 %a, %x
+  ret i1 %b
+}
+
+define <2 x i1> @test10_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test10_vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    ret <2 x i1> [[B]]
+;
+  %a = add <2 x i32> %x, <i32 -1, i32 -1>
+  %b = icmp slt <2 x i32> %a, %x
+  ret <2 x i1> %b
+}
+
+define i1 @test10b(i32 %x) {
+; CHECK-LABEL: @test10b(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = add i32 %x, -1
+  %b = icmp sgt i32 %a, %x
+  ret i1 %b
+}
+
+define <2 x i1> @test10b_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test10b_vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    ret <2 x i1> [[B]]
+;
+  %a = add <2 x i32> %x, <i32 -1, i32 -1>
+  %b = icmp sgt <2 x i32> %a, %x
+  ret <2 x i1> %b
+}
+
+define i1 @test11(i32 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    ret i1 true
+;
+  %a = add nsw i32 %x, 8
+  %b = icmp slt i32 %x, %a
+  ret i1 %b
+}
+
+define <2 x i1> @test11_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test11_vec(
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
+;
+  %a = add nsw <2 x i32> %x, <i32 8, i32 8>
+  %b = icmp slt <2 x i32> %x, %a
+  ret <2 x i1> %b
+}
+
+; PR6195
+define i1 @test12(i1 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[NOT_A:%.*]] = xor i1 [[A:%.*]], true
+; CHECK-NEXT:    ret i1 [[NOT_A]]
+;
+  %S = select i1 %A, i64 -4294967295, i64 8589934591
+  %B = icmp ne i64 bitcast (<2 x i32> <i32 1, i32 -1> to i64), %S
+  ret i1 %B
+}
+
+; PR6481
+define i1 @test13(i8 %X) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = icmp slt i8 undef, %X
+  ret i1 %cmp
+}
+
+define i1 @test14(i8 %X) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = icmp slt i8 undef, -128
+  ret i1 %cmp
+}
+
+define i1 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    ret i1 undef
+;
+  %cmp = icmp eq i8 undef, -128
+  ret i1 %cmp
+}
+
+define i1 @test16() {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    ret i1 undef
+;
+  %cmp = icmp ne i8 undef, -128
+  ret i1 %cmp
+}
+
+define i1 @test17(i32 %x) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test17vec(<2 x i32> %x) {
+; CHECK-LABEL: @test17vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %x
+  %and = and <2 x i32> %shl, <i32 8, i32 8>
+  %cmp = icmp eq <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test17a(i32 %x) {
+; CHECK-LABEL: @test17a(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 7
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test17a_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test17a_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %x
+  %and = and <2 x i32> %shl, <i32 7, i32 7>
+  %cmp = icmp eq <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test18_eq(i32 %x) {
+; CHECK-LABEL: @test18_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sh = lshr i32 8, %x
+  %and = and i32 %sh, 1
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test18_eq_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test18_eq_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %sh = lshr <2 x i32> <i32 8, i32 8>, %x
+  %and = and <2 x i32> %sh, <i32 1, i32 1>
+  %cmp = icmp eq <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test18_ne(i32 %x) {
+; CHECK-LABEL: @test18_ne(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sh = lshr i32 8, %x
+  %and = and i32 %sh, 1
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test18_ne_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test18_ne_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %sh = lshr <2 x i32> <i32 8, i32 8>, %x
+  %and = and <2 x i32> %sh, <i32 1, i32 1>
+  %cmp = icmp ne <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test19(i32 %x) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp eq i32 %and, 8
+  ret i1 %cmp
+}
+
+define <2 x i1> @test19vec(<2 x i32> %x) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %x
+  %and = and <2 x i32> %shl, <i32 8, i32 8>
+  %cmp = icmp eq <2 x i32> %and, <i32 8, i32 8>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @cmp_and_signbit_vec(<2 x i3> %x) {
+; CHECK-LABEL: @cmp_and_signbit_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i3> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i3> %x, <i3 4, i3 4>
+  %cmp = icmp ne <2 x i3> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test20(i32 %x) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test20vec(<2 x i32> %x) {
+; CHECK-LABEL: @test20vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %x
+  %and = and <2 x i32> %shl, <i32 8, i32 8>
+  %cmp = icmp ne <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test20a(i32 %x) {
+; CHECK-LABEL: @test20a(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 7
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test20a_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test20a_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %x
+  %and = and <2 x i32> %shl, <i32 7, i32 7>
+  %cmp = icmp ne <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test21(i8 %x, i8 %y) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = or i8 %x, 1
+  %B = icmp ugt i8 %A, 3
+  ret i1 %B
+}
+
+define i1 @test22(i8 %x, i8 %y) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[B:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = or i8 %x, 1
+  %B = icmp ult i8 %A, 4
+  ret i1 %B
+}
+
+; PR2740
+define i1 @test23(i32 %x) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[I4:%.*]] = icmp sgt i32 [[X:%.*]], 1328634634
+; CHECK-NEXT:    ret i1 [[I4]]
+;
+  %i3 = sdiv i32 %x, -1328634635
+  %i4 = icmp eq i32 %i3, -1
+  ret i1 %i4
+}
+
+define <2 x i1> @test23vec(<2 x i32> %x) {
+; CHECK-LABEL: @test23vec(
+; CHECK-NEXT:    [[I4:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 1328634634, i32 1328634634>
+; CHECK-NEXT:    ret <2 x i1> [[I4]]
+;
+  %i3 = sdiv <2 x i32> %x, <i32 -1328634635, i32 -1328634635>
+  %i4 = icmp eq <2 x i32> %i3, <i32 -1, i32 -1>
+  ret <2 x i1> %i4
+}
+
+@X = global [1000 x i32] zeroinitializer
+
+; PR8882
+define i1 @test24(i64 %i) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[I:%.*]], 1000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p1 = getelementptr inbounds i32, i32* getelementptr inbounds ([1000 x i32], [1000 x i32]* @X, i64 0, i64 0), i64 %i
+  %cmp = icmp eq i32* %p1, getelementptr inbounds ([1000 x i32], [1000 x i32]* @X, i64 1, i64 0)
+  ret i1 %cmp
+}
+
+@X_as1 = addrspace(1) global [1000 x i32] zeroinitializer
+
+define i1 @test24_as1(i64 %i) {
+; CHECK-LABEL: @test24_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[TMP1]], 1000
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %p1 = getelementptr inbounds i32, i32 addrspace(1)* getelementptr inbounds ([1000 x i32], [1000 x i32] addrspace(1)* @X_as1, i64 0, i64 0), i64 %i
+  %cmp = icmp eq i32 addrspace(1)* %p1, getelementptr inbounds ([1000 x i32], [1000 x i32] addrspace(1)* @X_as1, i64 1, i64 0)
+  ret i1 %cmp
+}
+
+define i1 @test25(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = add nsw i32 %x, %z
+  %rhs = add nsw i32 %y, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; X + Z > Y + Z -> X > Y if there is no overflow.
+define i1 @test26(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = add nuw i32 %x, %z
+  %rhs = add nuw i32 %y, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; X - Z > Y - Z -> X > Y if there is no overflow.
+define i1 @test27(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = sub nsw i32 %x, %z
+  %rhs = sub nsw i32 %y, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; X - Z > Y - Z -> X > Y if there is no overflow.
+define i1 @test28(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = sub nuw i32 %x, %z
+  %rhs = sub nuw i32 %y, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; X + Y > X -> Y > 0 if there is no overflow.
+define i1 @test29(i32 %x, i32 %y) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = add nsw i32 %x, %y
+  %c = icmp sgt i32 %lhs, %x
+  ret i1 %c
+}
+
+; X + Y > X -> Y > 0 if there is no overflow.
+define i1 @test30(i32 %x, i32 %y) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = add nuw i32 %x, %y
+  %c = icmp ugt i32 %lhs, %x
+  ret i1 %c
+}
+
+; X > X + Y -> 0 > Y if there is no overflow.
+define i1 @test31(i32 %x, i32 %y) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %rhs = add nsw i32 %x, %y
+  %c = icmp sgt i32 %x, %rhs
+  ret i1 %c
+}
+
+; X > X + Y -> 0 > Y if there is no overflow.
+define i1 @test32(i32 %x, i32 %y) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    ret i1 false
+;
+  %rhs = add nuw i32 %x, %y
+  %c = icmp ugt i32 %x, %rhs
+  ret i1 %c
+}
+
+; X - Y > X -> 0 > Y if there is no overflow.
+define i1 @test33(i32 %x, i32 %y) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = sub nsw i32 %x, %y
+  %c = icmp sgt i32 %lhs, %x
+  ret i1 %c
+}
+
+; X - Y > X -> 0 > Y if there is no overflow.
+define i1 @test34(i32 %x, i32 %y) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    ret i1 false
+;
+  %lhs = sub nuw i32 %x, %y
+  %c = icmp ugt i32 %lhs, %x
+  ret i1 %c
+}
+
+; X > X - Y -> Y > 0 if there is no overflow.
+define i1 @test35(i32 %x, i32 %y) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %rhs = sub nsw i32 %x, %y
+  %c = icmp sgt i32 %x, %rhs
+  ret i1 %c
+}
+
+; X > X - Y -> Y > 0 if there is no overflow.
+define i1 @test36(i32 %x, i32 %y) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %rhs = sub nuw i32 %x, %y
+  %c = icmp ugt i32 %x, %rhs
+  ret i1 %c
+}
+
+; PR36969 - https://bugs.llvm.org/show_bug.cgi?id=36969
+
+define i1 @ugt_sub(i32 %xsrc, i32 %y) {
+; CHECK-LABEL: @ugt_sub(
+; CHECK-NEXT:    [[X:%.*]] = udiv i32 [[XSRC:%.*]], 42
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x = udiv i32 %xsrc, 42 ; thwart complexity-based canonicalization
+  %sub = sub i32 %x, %y
+  %cmp = icmp ugt i32 %sub, %x
+  ret i1 %cmp
+}
+
+; Swap operands and predicate. Try a vector type to verify that works too.
+
+define <2 x i1> @ult_sub(<2 x i8> %xsrc, <2 x i8> %y) {
+; CHECK-LABEL: @ult_sub(
+; CHECK-NEXT:    [[X:%.*]] = udiv <2 x i8> [[XSRC:%.*]], <i8 42, i8 -42>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> [[X]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %x = udiv <2 x i8> %xsrc, <i8 42, i8 -42> ; thwart complexity-based canonicalization
+  %sub = sub <2 x i8> %x, %y
+  %cmp = icmp ult <2 x i8> %x, %sub
+  ret <2 x i1> %cmp
+}
+
+; X - Y > X - Z -> Z > Y if there is no overflow.
+define i1 @test37(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = sub nsw i32 %x, %y
+  %rhs = sub nsw i32 %x, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; X - Y > X - Z -> Z > Y if there is no overflow.
+define i1 @test38(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %lhs = sub nuw i32 %x, %y
+  %rhs = sub nuw i32 %x, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; PR9343 #1
+define i1 @test39(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = ashr exact i32 %X, %Y
+  %B = icmp eq i32 %A, 0
+  ret i1 %B
+}
+
+define <2 x i1> @test39vec(<2 x i32> %X, <2 x i32> %Y) {
+; CHECK-LABEL: @test39vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[B]]
+;
+  %A = ashr exact <2 x i32> %X, %Y
+  %B = icmp eq <2 x i32> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define i1 @test40(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = lshr exact i32 %X, %Y
+  %B = icmp ne i32 %A, 0
+  ret i1 %B
+}
+
+define <2 x i1> @test40vec(<2 x i32> %X, <2 x i32> %Y) {
+; CHECK-LABEL: @test40vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[B]]
+;
+  %A = lshr exact <2 x i32> %X, %Y
+  %B = icmp ne <2 x i32> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define i1 @shr_exact(i132 %x) {
+; CHECK-LABEL: @shr_exact(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i132 [[X:%.*]], 32
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sh = ashr exact i132 %x, 4
+  %cmp = icmp eq i132 %sh, 2
+  ret i1 %cmp
+}
+
+define <2 x i1> @shr_exact_vec(<2 x i132> %x) {
+; CHECK-LABEL: @shr_exact_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i132> [[X:%.*]], <i132 32, i132 32>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %sh = lshr exact <2 x i132> %x, <i132 4, i132 4>
+  %cmp = icmp ne <2 x i132> %sh, <i132 2, i132 2>
+  ret <2 x i1> %cmp
+}
+
+; PR9343 #3
+define i1 @test41(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:    ret i1 true
+;
+  %A = urem i32 %X, %Y
+  %B = icmp ugt i32 %Y, %A
+  ret i1 %B
+}
+
+define i1 @test42(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt i32 [[Y:%.*]], -1
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %A, %Y
+  ret i1 %B
+}
+
+define i1 @test43(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:    [[B:%.*]] = icmp slt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %Y, %A
+  ret i1 %B
+}
+
+define i1 @test44(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt i32 [[Y:%.*]], -1
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %A, %Y
+  ret i1 %B
+}
+
+define i1 @test45(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:    [[B:%.*]] = icmp slt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %Y, %A
+  ret i1 %B
+}
+
+; PR9343 #4
+define i1 @test46(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = ashr exact i32 %X, %Z
+  %B = ashr exact i32 %Y, %Z
+  %C = icmp ult i32 %A, %B
+  ret i1 %C
+}
+
+; PR9343 #5
+define i1 @test47(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = ashr exact i32 %X, %Z
+  %B = ashr exact i32 %Y, %Z
+  %C = icmp ugt i32 %A, %B
+  ret i1 %C
+}
+
+; PR9343 #8
+define i1 @test48(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = sdiv exact i32 %X, %Z
+  %B = sdiv exact i32 %Y, %Z
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
+
+; The above transform only works for equality predicates.
+
+define i1 @PR32949(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @PR32949(
+; CHECK-NEXT:    [[A:%.*]] = sdiv exact i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = sdiv exact i32 [[Y:%.*]], [[Z]]
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = sdiv exact i32 %X, %Z
+  %B = sdiv exact i32 %Y, %Z
+  %C = icmp sgt i32 %A, %B
+  ret i1 %C
+}
+
+; PR8469
+define <2 x i1> @test49(<2 x i32> %tmp3) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
+;
+entry:
+  %tmp11 = and <2 x i32> %tmp3, <i32 3, i32 3>
+  %cmp = icmp ult <2 x i32> %tmp11, <i32 4, i32 4>
+  ret <2 x i1> %cmp
+}
+
+; PR9343 #7
+define i1 @test50(i16 %X, i32 %Y) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT:    ret i1 true
+;
+  %A = zext i16 %X to i32
+  %B = srem i32 %A, %Y
+  %C = icmp sgt i32 %B, -1
+  ret i1 %C
+}
+
+define i1 @test51(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[B:%.*]] = srem i32 [[A]], [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[B]], -1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = and i32 %X, 2147483648
+  %B = srem i32 %A, %Y
+  %C = icmp sgt i32 %B, -1
+  ret i1 %C
+}
+
+define i1 @test52(i32 %x1) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X1:%.*]], 16711935
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 4980863
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %conv = and i32 %x1, 255
+  %cmp = icmp eq i32 %conv, 127
+  %tmp2 = lshr i32 %x1, 16
+  %tmp3 = trunc i32 %tmp2 to i8
+  %cmp15 = icmp eq i8 %tmp3, 76
+
+  %A = and i1 %cmp, %cmp15
+  ret i1 %A
+}
+
+define i1 @test52b(i128 %x1) {
+; CHECK-LABEL: @test52b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i128 [[X1:%.*]], 16711935
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i128 [[TMP1]], 4980863
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %conv = and i128 %x1, 255
+  %cmp = icmp eq i128 %conv, 127
+  %tmp2 = lshr i128 %x1, 16
+  %tmp3 = trunc i128 %tmp2 to i8
+  %cmp15 = icmp eq i8 %tmp3, 76
+
+  %A = and i1 %cmp, %cmp15
+  ret i1 %A
+}
+
+; PR9838
+define i1 @test53(i32 %a, i32 %b) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT:    [[X:%.*]] = sdiv exact i32 [[A:%.*]], 30
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[B:%.*]], 30
+; CHECK-NEXT:    [[Z:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %x = sdiv exact i32 %a, 30
+  %y = sdiv i32 %b, 30
+  %z = icmp eq i32 %x, %y
+  ret i1 %z
+}
+
+define i1 @test54(i8 %a) {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[A:%.*]], -64
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP1]], -128
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %ext = zext i8 %a to i32
+  %and = and i32 %ext, 192
+  %ret = icmp eq i32 %and, 128
+  ret i1 %ret
+}
+
+define i1 @test55(i32 %a) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -123
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = sub i32 0, %a
+  %cmp = icmp eq i32 %sub, 123
+  ret i1 %cmp
+}
+
+define <2 x i1> @test55vec(<2 x i32> %a) {
+; CHECK-LABEL: @test55vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], <i32 -123, i32 -123>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %sub = sub <2 x i32> zeroinitializer, %a
+  %cmp = icmp eq <2 x i32> %sub, <i32 123, i32 123>
+  ret <2 x i1> %cmp
+}
+
+define i1 @test56(i32 %a) {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], -113
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = sub i32 10, %a
+  %cmp = icmp eq i32 %sub, 123
+  ret i1 %cmp
+}
+
+define <2 x i1> @test56vec(<2 x i32> %a) {
+; CHECK-LABEL: @test56vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], <i32 -113, i32 -113>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %sub = sub <2 x i32> <i32 10, i32 10>, %a
+  %cmp = icmp eq <2 x i32> %sub, <i32 123, i32 123>
+  ret <2 x i1> %cmp
+}
+
+; PR10267 Don't make icmps more expensive when no other inst is subsumed.
+declare void @foo(i32)
+define i1 @test57(i32 %a) {
+; CHECK-LABEL: @test57(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    call void @foo(i32 [[AND]])
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i32 %a, -2
+  %cmp = icmp ne i32 %and, 0
+  call void @foo(i32 %and)
+  ret i1 %cmp
+}
+
+; rdar://problem/10482509
+define zeroext i1 @cmpabs1(i64 %val) {
+; CHECK-LABEL: @cmpabs1(
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i64 [[VAL:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %sub = sub nsw i64 0, %val
+  %cmp = icmp slt i64 %val, 0
+  %sub.val = select i1 %cmp, i64 %sub, i64 %val
+  %tobool = icmp ne i64 %sub.val, 0
+  ret i1 %tobool
+}
+
+define zeroext i1 @cmpabs2(i64 %val) {
+; CHECK-LABEL: @cmpabs2(
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i64 [[VAL:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %sub = sub nsw i64 0, %val
+  %cmp = icmp slt i64 %val, 0
+  %sub.val = select i1 %cmp, i64 %val, i64 %sub
+  %tobool = icmp ne i64 %sub.val, 0
+  ret i1 %tobool
+}
+
+define void @test58() {
+; CHECK-LABEL: @test58(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 36029346783166592)
+; CHECK-NEXT:    ret void
+;
+  %cast = bitcast <1 x i64> <i64 36029346783166592> to i64
+  %call = call i32 @test58_d( i64 %cast)
+  ret void
+}
+declare i32 @test58_d(i64)
+
+define i1 @test59(i8* %foo) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, i8* [[FOO:%.*]], i64 8
+; CHECK-NEXT:    [[USE:%.*]] = ptrtoint i8* [[GEP1]] to i64
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
+; CHECK-NEXT:    ret i1 true
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32, i32* %bit, i64 2
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 10
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  %use = ptrtoint i8* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use)
+  ret i1 %cmp
+}
+
+define i1 @test59_as1(i8 addrspace(1)* %foo) {
+; CHECK-LABEL: @test59_as1(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[FOO:%.*]], i16 8
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[GEP1]] to i16
+; CHECK-NEXT:    [[USE:%.*]] = zext i16 [[TMP1]] to i64
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
+; CHECK-NEXT:    ret i1 true
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 2
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 10
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  %use = ptrtoint i8 addrspace(1)* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use)
+  ret i1 %cmp
+}
+
+define i1 @test60(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i64 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[GEP1_IDX]], [[J:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[J:%.*]] to i16
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+}
+
+; Same as test60, but look through an addrspacecast instead of a
+; bitcast. This uses the same sized addrspace.
+define i1 @test60_addrspacecast(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60_addrspacecast(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i64 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[GEP1_IDX]], [[J:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %bit = addrspacecast i8* %foo to i32 addrspace(3)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(3)* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(3)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) {
+; CHECK-LABEL: @test60_addrspacecast_smaller(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[J:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %bit = addrspacecast i8* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = addrspacecast i32 addrspace(1)* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test60_addrspacecast_larger(i8 addrspace(1)* %foo, i32 %i, i16 %j) {
+; CHECK-LABEL: @test60_addrspacecast_larger(
+; CHECK-NEXT:    [[I_TR:%.*]] = trunc i32 [[I:%.*]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 [[I_TR]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[TMP1]], [[J:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %bit = addrspacecast i8 addrspace(1)* %foo to i32 addrspace(2)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(2)* %bit, i32 %i
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 %j
+  %cast1 = addrspacecast i32 addrspace(2)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test61(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT:    [[BIT:%.*]] = bitcast i8* [[FOO:%.*]] to i32*
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8* [[FOO]], i64 [[J:%.*]]
+; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8*
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8* [[GEP2]], [[CAST1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr i32, i32* %bit, i64 %i
+  %gep2 = getelementptr  i8,  i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+}
+
+define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test61_as1(
+; CHECK-NEXT:    [[BIT:%.*]] = bitcast i8 addrspace(1)* [[FOO:%.*]] to i32 addrspace(1)*
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 [[I:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* [[FOO]], i16 [[J:%.*]]
+; CHECK-NEXT:    [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)*
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 addrspace(1)* [[GEP2]], [[CAST1]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr i32, i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr i8, i8 addrspace(1)* %foo, i16 %j
+  %cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
+  %cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+}
+
+define i1 @test62(i8* %a) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT:    ret i1 true
+;
+  %arrayidx1 = getelementptr inbounds i8, i8* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 10
+  %cmp = icmp slt i8* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
+define i1 @test62_as1(i8 addrspace(1)* %a) {
+; CHECK-LABEL: @test62_as1(
+; CHECK-NEXT:    ret i1 true
+;
+  %arrayidx1 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 10
+  %cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+}
+
+define i1 @test63(i8 %a, i32 %b) {
+; CHECK-LABEL: @test63(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %z = zext i8 %a to i32
+  %t = and i32 %b, 255
+  %c = icmp eq i32 %z, %t
+  ret i1 %c
+}
+
+define i1 @test64(i8 %a, i32 %b) {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %t = and i32 %b, 255
+  %z = zext i8 %a to i32
+  %c = icmp eq i32 %t, %z
+  ret i1 %c
+}
+
+define i1 @test65(i64 %A, i64 %B) {
+; CHECK-LABEL: @test65(
+; CHECK-NEXT:    ret i1 true
+;
+  %s1 = add i64 %A, %B
+  %s2 = add i64 %A, %B
+  %cmp = icmp eq i64 %s1, %s2
+  ret i1 %cmp
+}
+
+define i1 @test66(i64 %A, i64 %B) {
+; CHECK-LABEL: @test66(
+; CHECK-NEXT:    ret i1 true
+;
+  %s1 = add i64 %A, %B
+  %s2 = add i64 %B, %A
+  %cmp = icmp eq i64 %s1, %s2
+  ret i1 %cmp
+}
+
+define i1 @test67(i32 %x) {
+; CHECK-LABEL: @test67(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 96
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i32 %x, 127
+  %cmp = icmp sgt i32 %and, 31
+  ret i1 %cmp
+}
+
+define i1 @test67inverse(i32 %x) {
+; CHECK-LABEL: @test67inverse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 96
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i32 %x, 127
+  %cmp = icmp sle i32 %and, 31
+  ret i1 %cmp
+}
+
+; The test above relies on 3 different folds.
+; This test only checks the last of those (icmp ugt -> icmp ne).
+
+define <2 x i1> @test67vec(<2 x i32> %x) {
+; CHECK-LABEL: @test67vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 96, i32 96>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i32> %x, <i32 96, i32 96>
+  %cmp = icmp ugt <2 x i32> %and, <i32 31, i32 31>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @test67vec2(<2 x i32> %x) {
+; CHECK-LABEL: @test67vec2(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 96, i32 96>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i32> %x, <i32 127, i32 127>
+  %cmp = icmp ugt <2 x i32> %and, <i32 31, i32 31>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @test67vecinverse(<2 x i32> %x) {
+; CHECK-LABEL: @test67vecinverse(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 96, i32 96>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i32> %x, <i32 96, i32 96>
+  %cmp = icmp sle <2 x i32> %and, <i32 31, i32 31>
+  ret <2 x i1> %cmp
+}
+
+define i1 @test68(i32 %x) {
+; CHECK-LABEL: @test68(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 127
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[AND]], 30
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i32 %x, 127
+  %cmp = icmp sgt i32 %and, 30
+  ret i1 %cmp
+}
+
+; PR15940
+define i1 @test70(i32 %X) {
+; CHECK-LABEL: @test70(
+; CHECK-NEXT:    [[A:%.*]] = srem i32 5, [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[A]], 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = srem i32 5, %X
+  %B = add i32 %A, 2
+  %C = icmp ne i32 %B, 4
+  ret i1 %C
+}
+
+define <2 x i1> @test70vec(<2 x i32> %X) {
+; CHECK-LABEL: @test70vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = add <2 x i32> %X, <i32 2, i32 2>
+  %C = icmp ne <2 x i32> %B, <i32 4, i32 4>
+  ret <2 x i1> %C
+}
+
+define i1 @icmp_sext16trunc(i32 %x) {
+; CHECK-LABEL: @icmp_sext16trunc(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %trunc = trunc i32 %x to i16
+  %sext = sext i16 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+define i1 @icmp_sext8trunc(i32 %x) {
+; CHECK-LABEL: @icmp_sext8trunc(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %trunc = trunc i32 %x to i8
+  %sext = sext i8 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; Vectors should fold the same way.
+define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_sext8trunc_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], <i8 36, i8 36>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %trunc = trunc <2 x i32> %x to <2 x i8>
+  %sext = sext <2 x i8> %trunc to <2 x i32>
+  %cmp = icmp slt <2 x i32> %sext, <i32 36, i32 36>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl16(i32 %x) {
+; CHECK-LABEL: @icmp_shl16(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 %x, 16
+  %cmp = icmp slt i32 %shl, 2359296
+  ret i1 %cmp
+}
+
+; D25952: Don't create illegal types like i15 in InstCombine
+
+define i1 @icmp_shl17(i32 %x) {
+; CHECK-LABEL: @icmp_shl17(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 17
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 2359296
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 %x, 17
+  %cmp = icmp slt i32 %shl, 2359296
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_shl16_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], <i16 36, i16 36>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> %x, <i32 16, i32 16>
+  %cmp = icmp slt <2 x i32> %shl, <i32 2359296, i32 2359296>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl24(i32 %x) {
+; CHECK-LABEL: @icmp_shl24(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 %x, 24
+  %cmp = icmp slt i32 %shl, 603979776
+  ret i1 %cmp
+}
+
+define i1 @icmp_shl_eq(i32 %x) {
+; CHECK-LABEL: @icmp_shl_eq(
+; CHECK-NEXT:    [[MUL_MASK:%.*]] = and i32 [[X:%.*]], 134217727
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MUL_MASK]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = shl i32 %x, 5
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_eq_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_shl_eq_vec(
+; CHECK-NEXT:    [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 134217727, i32 134217727>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[MUL_MASK]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %mul = shl <2 x i32> %x, <i32 5, i32 5>
+  %cmp = icmp eq <2 x i32> %mul, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_nsw_ne(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_ne(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = shl nsw i32 %x, 7
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_nsw_ne_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_shl_nsw_ne_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %mul = shl nsw <2 x i32> %x, <i32 7, i32 7>
+  %cmp = icmp ne <2 x i32> %mul, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_ne(i32 %x) {
+; CHECK-LABEL: @icmp_shl_ne(
+; CHECK-NEXT:    [[MUL_MASK:%.*]] = and i32 [[X:%.*]], 33554431
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MUL_MASK]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = shl i32 %x, 7
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_ne_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_shl_ne_vec(
+; CHECK-NEXT:    [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 33554431, i32 33554431>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[MUL_MASK]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %mul = shl <2 x i32> %x, <i32 7, i32 7>
+  %cmp = icmp ne <2 x i32> %mul, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @icmp_shl_nuw_ne_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_shl_nuw_ne_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl nuw <2 x i32> %x, <i32 7, i32 7>
+  %cmp = icmp ne <2 x i32> %shl, <i32 256, i32 256>
+  ret <2 x i1> %cmp
+}
+
+; If the (mul x, C) preserved the sign and this is sign test,
+; compare the LHS operand instead
+define i1 @icmp_mul_nsw(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp sgt i32 %mul, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_mul_nsw1(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp sle i32 %mul, -1
+  ret i1 %cmp
+}
+
+define i1 @icmp_mul_nsw_neg(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw_neg(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp sge i32 %mul, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_mul_nsw_neg1(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw_neg1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp sge i32 %mul, 1
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_mul_nsw_neg1_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_mul_nsw_neg1_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %mul = mul nsw <2 x i32> %x, <i32 -12, i32 -12>
+  %cmp = icmp sge <2 x i32> %mul, <i32 1, i32 1>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_mul_nsw_0(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw_0(
+; CHECK-NEXT:    ret i1 false
+;
+  %mul = mul nsw i32 %x, 0
+  %cmp = icmp sgt i32 %mul, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_mul(i32 %x) {
+; CHECK-LABEL: @icmp_mul(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[X:%.*]], -12
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[MUL]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i32 %x, -12
+  %cmp = icmp sge i32 %mul, 0
+  ret i1 %cmp
+}
+
+; Checks for icmp (eq|ne) (mul x, C), 0
+define i1 @icmp_mul_neq0(i32 %x) {
+; CHECK-LABEL: @icmp_mul_neq0(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_mul_neq0_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_mul_neq0_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %mul = mul nsw <2 x i32> %x, <i32 -12, i32 -12>
+  %cmp = icmp ne <2 x i32> %mul, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_mul_eq0(i32 %x) {
+; CHECK-LABEL: @icmp_mul_eq0(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_mul0_eq0(i32 %x) {
+; CHECK-LABEL: @icmp_mul0_eq0(
+; CHECK-NEXT:    ret i1 true
+;
+  %mul = mul i32 %x, 0
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_mul0_ne0(i32 %x) {
+; CHECK-LABEL: @icmp_mul0_ne0(
+; CHECK-NEXT:    ret i1 false
+;
+  %mul = mul i32 %x, 0
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_sub1_sge(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_sub1_sge(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = add nsw i32 %x, -1
+  %cmp = icmp sge i32 %sub, %y
+  ret i1 %cmp
+}
+
+define i1 @icmp_add1_sgt(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add1_sgt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %x, 1
+  %cmp = icmp sgt i32 %add, %y
+  ret i1 %cmp
+}
+
+define i1 @icmp_sub1_slt(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_sub1_slt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = add nsw i32 %x, -1
+  %cmp = icmp slt i32 %sub, %y
+  ret i1 %cmp
+}
+
+define i1 @icmp_add1_sle(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add1_sle(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %x, 1
+  %cmp = icmp sle i32 %add, %y
+  ret i1 %cmp
+}
+
+define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add20_sge_add57(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[Y:%.*]], 37
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %1 = add nsw i32 %x, 20
+  %2 = add nsw i32 %y, 57
+  %cmp = icmp sge i32 %1, %2
+  ret i1 %cmp
+}
+
+define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_sub57_sge_sub20(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[X:%.*]], -37
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %1 = add nsw i32 %x, -57
+  %2 = add nsw i32 %y, -20
+  %cmp = icmp sge i32 %1, %2
+  ret i1 %cmp
+}
+
+define i1 @icmp_and_shl_neg_ne_0(i32 %A, i32 %B) {
+; CHECK-LABEL: @icmp_and_shl_neg_ne_0(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %neg = xor i32 %A, -1
+  %shl = shl i32 1, %B
+  %and = and i32 %shl, %neg
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_and_shl_neg_eq_0(i32 %A, i32 %B) {
+; CHECK-LABEL: @icmp_and_shl_neg_eq_0(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %neg = xor i32 %A, -1
+  %shl = shl i32 1, %B
+  %and = and i32 %shl, %neg
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_add_and_shr_ne_0(i32 %X) {
+; CHECK-LABEL: @icmp_add_and_shr_ne_0(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 240
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = lshr i32 %X, 4
+  %and = and i32 %shr, 15
+  %add = add i32 %and, -14
+  %tobool = icmp ne i32 %add, 0
+  ret i1 %tobool
+}
+
+define <2 x i1> @icmp_add_and_shr_ne_0_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_add_and_shr_ne_0_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 240, i32 240>
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne <2 x i32> [[AND]], <i32 224, i32 224>
+; CHECK-NEXT:    ret <2 x i1> [[TOBOOL]]
+;
+  %shr = lshr <2 x i32> %X, <i32 4, i32 4>
+  %and = and <2 x i32> %shr, <i32 15, i32 15>
+  %add = add <2 x i32> %and, <i32 -14, i32 -14>
+  %tobool = icmp ne <2 x i32> %add, zeroinitializer
+  ret <2 x i1> %tobool
+}
+
+; Variation of the above with an extra use of the shift
+define i1 @icmp_and_shr_multiuse(i32 %X) {
+; CHECK-LABEL: @icmp_and_shr_multiuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 240
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[X]], 496
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[AND2]], 432
+; CHECK-NEXT:    [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]]
+; CHECK-NEXT:    ret i1 [[AND3]]
+;
+  %shr = lshr i32 %X, 4
+  %and = and i32 %shr, 15
+  %and2 = and i32 %shr, 31 ; second use of the shift
+  %tobool = icmp ne i32 %and, 14
+  %tobool2 = icmp ne i32 %and2, 27
+  %and3 = and i1 %tobool, %tobool2
+  ret i1 %and3
+}
+
+; Variation of the above with an ashr
+define i1 @icmp_and_ashr_multiuse(i32 %X) {
+; CHECK-LABEL: @icmp_and_ashr_multiuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 240
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[X]], 496
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[AND2]], 432
+; CHECK-NEXT:    [[AND3:%.*]] = and i1 [[TOBOOL]], [[TOBOOL2]]
+; CHECK-NEXT:    ret i1 [[AND3]]
+;
+  %shr = ashr i32 %X, 4
+  %and = and i32 %shr, 15
+  %and2 = and i32 %shr, 31 ; second use of the shift
+  %tobool = icmp ne i32 %and, 14
+  %tobool2 = icmp ne i32 %and2, 27
+  %and3 = and i1 %tobool, %tobool2
+  ret i1 %and3
+}
+
+define i1 @icmp_lshr_and_overshift(i8 %X) {
+; CHECK-LABEL: @icmp_lshr_and_overshift(
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ugt i8 [[X:%.*]], 31
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = lshr i8 %X, 5
+  %and = and i8 %shr, 15
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; We shouldn't simplify this because the and uses bits that are shifted in.
+define i1 @icmp_ashr_and_overshift(i8 %X) {
+; CHECK-LABEL: @icmp_ashr_and_overshift(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[SHR]], 15
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+  %shr = ashr i8 %X, 5
+  %and = and i8 %shr, 15
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; PR16244
+define i1 @test71(i8* %x) {
+; CHECK-LABEL: @test71(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = getelementptr i8, i8* %x, i64 8
+  %b = getelementptr inbounds i8, i8* %x, i64 8
+  %c = icmp ugt i8* %a, %b
+  ret i1 %c
+}
+
+define i1 @test71_as1(i8 addrspace(1)* %x) {
+; CHECK-LABEL: @test71_as1(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = getelementptr i8, i8 addrspace(1)* %x, i64 8
+  %b = getelementptr inbounds i8, i8 addrspace(1)* %x, i64 8
+  %c = icmp ugt i8 addrspace(1)* %a, %b
+  ret i1 %c
+}
+
+define i1 @icmp_shl_1_V_ult_32(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[V:%.*]], 5
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %V
+  %cmp = icmp ult i32 %shl, 32
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_1_V_ult_32_vec(<2 x i32> %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_32_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %V
+  %cmp = icmp ult <2 x i32> %shl, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_1_V_eq_32(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_eq_32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V:%.*]], 5
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %V
+  %cmp = icmp eq i32 %shl, 32
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_1_V_eq_32_vec(<2 x i32> %V) {
+; CHECK-LABEL: @icmp_shl_1_V_eq_32_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %V
+  %cmp = icmp eq <2 x i32> %shl, <i32 32, i32 32>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_1_V_ult_30(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_30(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[V:%.*]], 5
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %V
+  %cmp = icmp ult i32 %shl, 30
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_1_V_ult_30_vec(<2 x i32> %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_30_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %V
+  %cmp = icmp ult <2 x i32> %shl, <i32 30, i32 30>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_1_V_ugt_30(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ugt_30(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[V:%.*]], 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %V
+  %cmp = icmp ugt i32 %shl, 30
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_1_V_ugt_30_vec(<2 x i32> %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ugt_30_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %V
+  %cmp = icmp ugt <2 x i32> %shl, <i32 30, i32 30>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_1_V_ule_30(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ule_30(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[V:%.*]], 5
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %V
+  %cmp = icmp ule i32 %shl, 30
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_1_V_ule_30_vec(<2 x i32> %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ule_30_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %V
+  %cmp = icmp ule <2 x i32> %shl, <i32 30, i32 30>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_1_V_uge_30(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_uge_30(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[V:%.*]], 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %V
+  %cmp = icmp uge i32 %shl, 30
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_1_V_uge_30_vec(<2 x i32> %V) {
+; CHECK-LABEL: @icmp_shl_1_V_uge_30_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %V
+  %cmp = icmp uge <2 x i32> %shl, <i32 30, i32 30>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_uge_2147483648(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V:%.*]], 31
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %V
+  %cmp = icmp uge i32 %shl, 2147483648
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_1_V_uge_2147483648_vec(<2 x i32> %V) {
+; CHECK-LABEL: @icmp_shl_1_V_uge_2147483648_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %V
+  %cmp = icmp uge <2 x i32> %shl, <i32 2147483648, i32 2147483648>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_shl_1_V_ult_2147483648(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[V:%.*]], 31
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 1, %V
+  %cmp = icmp ult i32 %shl, 2147483648
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_shl_1_V_ult_2147483648_vec(<2 x i32> %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 1, i32 1>, %V
+  %cmp = icmp ult <2 x i32> %shl, <i32 2147483648, i32 2147483648>
+  ret <2 x i1> %cmp
+}
+
+define i1 @or_icmp_eq_B_0_icmp_ult_A_B(i64 %a, i64 %b) {
+; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[B:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i64 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = icmp eq i64 %b, 0
+  %2 = icmp ult i64 %a, %b
+  %3 = or i1 %1, %2
+  ret i1 %3
+}
+
+define i1 @icmp_add_ult_2(i32 %X) {
+; CHECK-LABEL: @icmp_add_ult_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 14
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %X, -14
+  %cmp = icmp ult i32 %add, 2
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_add_X_-14_ult_2_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_add_X_-14_ult_2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 14, i32 14>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %add = add <2 x i32> %X, <i32 -14, i32 -14>
+  %cmp = icmp ult <2 x i32> %add, <i32 2, i32 2>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_sub_3_X_ult_2(i32 %X) {
+; CHECK-LABEL: @icmp_sub_3_X_ult_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = sub i32 3, %X
+  %cmp = icmp ult i32 %add, 2
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_sub_3_X_ult_2_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_sub_3_X_ult_2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %add = sub <2 x i32> <i32 3, i32 3>, %X
+  %cmp = icmp ult <2 x i32> %add, <i32 2, i32 2>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_add_X_-14_uge_2(i32 %X) {
+; CHECK-LABEL: @icmp_add_X_-14_uge_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP1]], 14
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %X, -14
+  %cmp = icmp uge i32 %add, 2
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_add_X_-14_uge_2_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_add_X_-14_uge_2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 14, i32 14>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %add = add <2 x i32> %X, <i32 -14, i32 -14>
+  %cmp = icmp uge <2 x i32> %add, <i32 2, i32 2>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_sub_3_X_uge_2(i32 %X) {
+; CHECK-LABEL: @icmp_sub_3_X_uge_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP1]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = sub i32 3, %X
+  %cmp = icmp uge i32 %add, 2
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_sub_3_X_uge_2_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_sub_3_X_uge_2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %add = sub <2 x i32> <i32 3, i32 3>, %X
+  %cmp = icmp uge <2 x i32> %add, <i32 2, i32 2>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_and_X_-16_eq-16(i32 %X) {
+; CHECK-LABEL: @icmp_and_X_-16_eq-16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -17
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i32 %X, -16
+  %cmp = icmp eq i32 %and, -16
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_and_X_-16_eq-16_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_and_X_-16_eq-16_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 -17, i32 -17>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i32> %X, <i32 -16, i32 -16>
+  %cmp = icmp eq <2 x i32> %and, <i32 -16, i32 -16>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_and_X_-16_ne-16(i32 %X) {
+; CHECK-LABEL: @icmp_and_X_-16_ne-16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -16
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i32 %X, -16
+  %cmp = icmp ne i32 %and, -16
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_and_X_-16_ne-16_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_and_X_-16_ne-16_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 -16, i32 -16>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i32> %X, <i32 -16, i32 -16>
+  %cmp = icmp ne <2 x i32> %and, <i32 -16, i32 -16>
+  ret <2 x i1> %cmp
+}
+
+; PR32524: https://bugs.llvm.org/show_bug.cgi?id=32524
+; X | C == C --> X <=u C (when C+1 is PowerOf2).
+
+define i1 @or1_eq1(i32 %x) {
+; CHECK-LABEL: @or1_eq1(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ult i32 [[X:%.*]], 2
+; CHECK-NEXT:    ret i1 [[T1]]
+;
+  %t0 = or i32 %x, 1
+  %t1 = icmp eq i32 %t0, 1
+  ret i1 %t1
+}
+
+; X | C == C --> X <=u C (when C+1 is PowerOf2).
+
+define <2 x i1> @or3_eq3_vec(<2 x i8> %x) {
+; CHECK-LABEL: @or3_eq3_vec(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[T1]]
+;
+  %t0 = or <2 x i8> %x, <i8 3, i8 3>
+  %t1 = icmp eq <2 x i8> %t0, <i8 3, i8 3>
+  ret <2 x i1> %t1
+}
+
+; X | C != C --> X >u C (when C+1 is PowerOf2).
+
+define i1 @or7_ne7(i32 %x) {
+; CHECK-LABEL: @or7_ne7(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ugt i32 [[X:%.*]], 7
+; CHECK-NEXT:    ret i1 [[T1]]
+;
+  %t0 = or i32 %x, 7
+  %t1 = icmp ne i32 %t0, 7
+  ret i1 %t1
+}
+
+; X | C != C --> X >u C (when C+1 is PowerOf2).
+
+define <2 x i1> @or63_ne63_vec(<2 x i8> %x) {
+; CHECK-LABEL: @or63_ne63_vec(
+; CHECK-NEXT:    [[T1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 63, i8 63>
+; CHECK-NEXT:    ret <2 x i1> [[T1]]
+;
+  %t0 = or <2 x i8> %x, <i8 63, i8 63>
+  %t1 = icmp ne <2 x i8> %t0, <i8 63, i8 63>
+  ret <2 x i1> %t1
+}
+
+; PR40611: https://bugs.llvm.org/show_bug.cgi?id=40611
+; X | C == C --> (X & ~C) == 0
+
+define i1 @orC_eqC(i32 %x) {
+; CHECK-LABEL: @orC_eqC(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -43
+; CHECK-NEXT:    [[T1:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[T1]]
+;
+  %t0 = or i32 %x, 42
+  %t1 = icmp eq i32 %t0, 42
+  ret i1 %t1
+}
+
+; X | C == C --> (X & ~C) == 0
+
+define <2 x i1> @orC_eqC_vec(<2 x i8> %x) {
+; CHECK-LABEL: @orC_eqC_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -44, i8 -44>
+; CHECK-NEXT:    [[T1:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[T1]]
+;
+  %t0 = or <2 x i8> %x, <i8 43, i8 43>
+  %t1 = icmp eq <2 x i8> %t0, <i8 43, i8 43>
+  ret <2 x i1> %t1
+}
+
+; X | C != C --> (X & ~C) != 0
+
+define i1 @orC_neC(i32 %x) {
+; CHECK-LABEL: @orC_neC(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 41
+; CHECK-NEXT:    [[T1:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[T1]]
+;
+  %t0 = or i32 %x, -42
+  %t1 = icmp ne i32 %t0, -42
+  ret i1 %t1
+}
+
+; X | C != C --> (X & ~C) != 0
+
+define <2 x i1> @orC_neC_vec(<2 x i8> %x) {
+; CHECK-LABEL: @orC_neC_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    [[T1:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[T1]]
+;
+  %t0 = or <2 x i8> %x, <i8 -43, i8 -43>
+  %t1 = icmp ne <2 x i8> %t0, <i8 -43, i8 -43>
+  ret <2 x i1> %t1
+}
+
+define i1 @shrink_constant(i32 %X) {
+; CHECK-LABEL: @shrink_constant(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[X:%.*]], -12
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[XOR]], 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %xor = xor i32 %X, -9
+  %cmp = icmp ult i32 %xor, 4
+  ret i1 %cmp
+}
+
+define <2 x i1> @shrink_constant_vec(<2 x i32> %X) {
+; CHECK-LABEL: @shrink_constant_vec(
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -12, i32 -12>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[XOR]], <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %xor = xor <2 x i32> %X, <i32 -9, i32 -9>
+  %cmp = icmp ult <2 x i32> %xor, <i32 4, i32 4>
+  ret <2 x i1> %cmp
+}
+
+; This test requires 3 different transforms to get to the result.
+define i1 @icmp_sub_-1_X_ult_4(i32 %X) {
+; CHECK-LABEL: @icmp_sub_-1_X_ult_4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -5
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = sub i32 -1, %X
+  %cmp = icmp ult i32 %sub, 4
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_xor_neg4_X_ult_4_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_xor_neg4_X_ult_4_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 -5, i32 -5>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %xor = xor <2 x i32> %X, <i32 -4, i32 -4>
+  %cmp = icmp ult <2 x i32> %xor, <i32 4, i32 4>
+  ret <2 x i1> %cmp
+}
+
+define i1 @icmp_sub_-1_X_uge_4(i32 %X) {
+; CHECK-LABEL: @icmp_sub_-1_X_uge_4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], -4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = sub i32 -1, %X
+  %cmp = icmp uge i32 %sub, 4
+  ret i1 %cmp
+}
+
+define <2 x i1> @icmp_xor_neg4_X_uge_4_vec(<2 x i32> %X) {
+; CHECK-LABEL: @icmp_xor_neg4_X_uge_4_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 -4, i32 -4>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %xor = xor <2 x i32> %X, <i32 -4, i32 -4>
+  %cmp = icmp uge <2 x i32> %xor, <i32 4, i32 4>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @xor_ult(<2 x i8> %x) {
+; CHECK-LABEL: @xor_ult(
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %xor = xor <2 x i8> %x, <i8 -4, i8 -4>
+  %r = icmp ult <2 x i8> %xor, <i8 -4, i8 -4>
+  ret <2 x i1> %r
+}
+
+define i1 @xor_ult_extra_use(i8 %x, i8* %p) {
+; CHECK-LABEL: @xor_ult_extra_use(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[X:%.*]], -32
+; CHECK-NEXT:    store i8 [[XOR]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], 31
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xor = xor i8 %x, -32
+  store i8 %xor, i8* %p
+  %r = icmp ult i8 %xor, -32
+  ret i1 %r
+}
+
+define <2 x i1> @xor_ugt(<2 x i8> %x) {
+; CHECK-LABEL: @xor_ugt(
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %xor = xor <2 x i8> %x, <i8 7, i8 7>
+  %r = icmp ugt <2 x i8> %xor, <i8 7, i8 7>
+  ret <2 x i1> %r
+}
+
+define i1 @xor_ugt_extra_use(i8 %x, i8* %p) {
+; CHECK-LABEL: @xor_ugt_extra_use(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[X:%.*]], 63
+; CHECK-NEXT:    store i8 [[XOR]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], 63
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xor = xor i8 %x, 63
+  store i8 %xor, i8* %p
+  %r = icmp ugt i8 %xor, 63
+  ret i1 %r
+}
+
+define i1 @icmp_swap_operands_for_cse(i32 %X, i32 %Y) {
+; CHECK-LABEL: @icmp_swap_operands_for_cse(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[SUB]], 1
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       false:
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SUB]], 16
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[RES_IN:%.*]] = phi i32 [ [[TMP0]], [[TRUE]] ], [ [[TMP1]], [[FALSE]] ]
+; CHECK-NEXT:    [[RES:%.*]] = icmp ne i32 [[RES_IN]], 0
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+entry:
+  %sub = sub i32 %X, %Y
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %shift = lshr i32 %sub, 4
+  %resfalse = trunc i32 %shift to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+define i1 @icmp_swap_operands_for_cse2(i32 %X, i32 %Y) {
+; CHECK-LABEL: @icmp_swap_operands_for_cse2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]]
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       false:
+; CHECK-NEXT:    [[SUB2:%.*]] = sub i32 [[Y]], [[X]]
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[RES_IN_IN:%.*]] = phi i32 [ [[ADD]], [[TRUE]] ], [ [[SUB2]], [[FALSE]] ]
+; CHECK-NEXT:    [[RES_IN:%.*]] = and i32 [[RES_IN_IN]], 1
+; CHECK-NEXT:    [[RES:%.*]] = icmp ne i32 [[RES_IN]], 0
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %sub1 = sub i32 %X, %Y
+  %add = add i32 %sub, %sub1
+  %restrue = trunc i32 %add to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+define i1 @icmp_do_not_swap_operands_for_cse(i32 %X, i32 %Y) {
+; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X]], [[Y]]
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       false:
+; CHECK-NEXT:    [[SUB2:%.*]] = sub i32 [[Y]], [[X]]
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[RES_IN_IN:%.*]] = phi i32 [ [[SUB]], [[TRUE]] ], [ [[SUB2]], [[FALSE]] ]
+; CHECK-NEXT:    [[RES_IN:%.*]] = and i32 [[RES_IN_IN]], 1
+; CHECK-NEXT:    [[RES:%.*]] = icmp ne i32 [[RES_IN]], 0
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+entry:
+  %cmp = icmp ugt i32 %Y, %X
+  br i1 %cmp, label %true, label %false
+true:
+  %sub = sub i32 %X, %Y
+  %restrue = trunc i32 %sub to i1
+  br label %end
+false:
+  %sub2 = sub i32 %Y, %X
+  %resfalse = trunc i32 %sub2 to i1
+  br label %end
+end:
+  %res = phi i1 [%restrue, %true], [%resfalse, %false]
+  ret i1 %res
+}
+
+define i1 @icmp_lshr_lshr_eq(i32 %a, i32 %b) {
+; CHECK-LABEL: @icmp_lshr_lshr_eq(
+; CHECK-NEXT:    [[Z_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp ult i32 [[Z_UNSHIFTED]], 1073741824
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %x = lshr i32 %a, 30
+  %y = lshr i32 %b, 30
+  %z = icmp eq i32 %x, %y
+  ret i1 %z
+}
+
+define i1 @icmp_ashr_ashr_ne(i32 %a, i32 %b) {
+; CHECK-LABEL: @icmp_ashr_ashr_ne(
+; CHECK-NEXT:    [[Z_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = icmp ugt i32 [[Z_UNSHIFTED]], 255
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %x = ashr i32 %a, 8
+  %y = ashr i32 %b, 8
+  %z = icmp ne i32 %x, %y
+  ret i1 %z
+}
+
+define i1 @icmp_neg_cst_slt(i32 %a) {
+; CHECK-LABEL: @icmp_neg_cst_slt(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], 10
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = sub nsw i32 0, %a
+  %2 = icmp slt i32 %1, -10
+  ret i1 %2
+}
+
+define i1 @icmp_and_or_lshr(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_and_or_lshr(
+; CHECK-NEXT:    [[SHF1:%.*]] = shl nuw i32 1, [[Y:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[SHF1]], 1
+; CHECK-NEXT:    [[AND3:%.*]] = and i32 [[OR2]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i32 [[AND3]], 0
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %shf = lshr i32 %x, %y
+  %or = or i32 %shf, %x
+  %and = and i32 %or, 1
+  %ret = icmp ne i32 %and, 0
+  ret i1 %ret
+}
+
+define <2 x i1> @icmp_and_or_lshr_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @icmp_and_or_lshr_vec(
+; CHECK-NEXT:    [[SHF:%.*]] = lshr <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[SHF]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = trunc <2 x i32> [[OR]] to <2 x i1>
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %shf = lshr <2 x i32> %x, %y
+  %or = or <2 x i32> %shf, %x
+  %and = and <2 x i32> %or, <i32 1, i32 1>
+  %ret = icmp ne <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @icmp_and_or_lshr_vec_commute(<2 x i32> %xp, <2 x i32> %y) {
+; CHECK-LABEL: @icmp_and_or_lshr_vec_commute(
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], <i32 42, i32 42>
+; CHECK-NEXT:    [[SHF:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[X]], [[SHF]]
+; CHECK-NEXT:    [[RET:%.*]] = trunc <2 x i32> [[OR]] to <2 x i1>
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = srem <2 x i32> %xp, <i32 42, i32 -42> ; prevent complexity-based canonicalization
+  %shf = lshr <2 x i32> %x, %y
+  %or = or <2 x i32> %x, %shf
+  %and = and <2 x i32> %or, <i32 1, i32 1>
+  %ret = icmp ne <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %ret
+}
+
+define i1 @icmp_and_or_lshr_cst(i32 %x) {
+; CHECK-LABEL: @icmp_and_or_lshr_cst(
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i32 [[AND1]], 0
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %shf = lshr i32 %x, 1
+  %or = or i32 %shf, %x
+  %and = and i32 %or, 1
+  %ret = icmp ne i32 %and, 0
+  ret i1 %ret
+}
+
+define <2 x i1> @icmp_and_or_lshr_cst_vec(<2 x i32> %x) {
+; CHECK-LABEL: @icmp_and_or_lshr_cst_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %shf = lshr <2 x i32> %x, <i32 1, i32 1>
+  %or = or <2 x i32> %shf, %x
+  %and = and <2 x i32> %or, <i32 1, i32 1>
+  %ret = icmp ne <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @icmp_and_or_lshr_cst_vec_commute(<2 x i32> %xp) {
+; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_commute(
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], <i32 42, i32 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X]], <i32 3, i32 3>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = srem <2 x i32> %xp, <i32 42, i32 -42> ; prevent complexity-based canonicalization
+  %shf = lshr <2 x i32> %x, <i32 1, i32 1>
+  %or = or <2 x i32> %x, %shf
+  %and = and <2 x i32> %or, <i32 1, i32 1>
+  %ret = icmp ne <2 x i32> %and, zeroinitializer
+  ret <2 x i1> %ret
+}
+
+define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) {
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 29
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 4, %a
+  %cmp = icmp eq i32 %shl, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @shl_ap1_zero_ap2_non_zero_2_vec(<2 x i32> %a) {
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 29, i32 29>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shl = shl <2 x i32> <i32 4, i32 4>, %a
+  %cmp = icmp eq <2 x i32> %shl, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @shl_ap1_zero_ap2_non_zero_4(i32 %a) {
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 30
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 -2, %a
+  %cmp = icmp eq i32 %shl, 0
+  ret i1 %cmp
+}
+
+define i1 @shl_ap1_non_zero_ap2_non_zero_both_positive(i32 %a) {
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_positive(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 50, %a
+  %cmp = icmp eq i32 %shl, 50
+  ret i1 %cmp
+}
+
+define i1 @shl_ap1_non_zero_ap2_non_zero_both_negative(i32 %a) {
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_negative(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 -50, %a
+  %cmp = icmp eq i32 %shl, -50
+  ret i1 %cmp
+}
+
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_1(i32 %a) {
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_1(
+; CHECK-NEXT:    ret i1 false
+;
+  %shl = shl i32 50, %a
+  %cmp = icmp eq i32 %shl, 25
+  ret i1 %cmp
+}
+
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_2(i32 %a) {
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 25, %a
+  %cmp = icmp eq i32 %shl, 50
+  ret i1 %cmp
+}
+
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_3(i32 %a) {
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_3(
+; CHECK-NEXT:    ret i1 false
+;
+  %shl = shl i32 26, %a
+  %cmp = icmp eq i32 %shl, 50
+  ret i1 %cmp
+}
+
+define i1 @icmp_sgt_zero_add_nsw(i32 %a) {
+; CHECK-LABEL: @icmp_sgt_zero_add_nsw(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %a, 1
+  %cmp = icmp sgt i32 %add, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_sge_zero_add_nsw(i32 %a) {
+; CHECK-LABEL: @icmp_sge_zero_add_nsw(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], -2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %a, 1
+  %cmp = icmp sge i32 %add, 0
+  ret i1 %cmp
+}
+
+define i1 @icmp_sle_zero_add_nsw(i32 %a) {
+; CHECK-LABEL: @icmp_sle_zero_add_nsw(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nsw i32 %a, 1
+  %cmp = icmp sle i32 %add, 0
+  ret i1 %cmp
+}
+
+define zeroext i1 @icmp_cmpxchg_strong(i32* %sc, i32 %old_val, i32 %new_val) {
+; CHECK-LABEL: @icmp_cmpxchg_strong(
+; CHECK-NEXT:    [[XCHG:%.*]] = cmpxchg i32* [[SC:%.*]], i32 [[OLD_VAL:%.*]], i32 [[NEW_VAL:%.*]] seq_cst seq_cst
+; CHECK-NEXT:    [[ICMP:%.*]] = extractvalue { i32, i1 } [[XCHG]], 1
+; CHECK-NEXT:    ret i1 [[ICMP]]
+;
+  %xchg = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
+  %xtrc = extractvalue { i32, i1 } %xchg, 0
+  %icmp = icmp eq i32 %xtrc, %old_val
+  ret i1 %icmp
+}
+
+define i1 @f1(i64 %a, i64 %b) {
+; CHECK-LABEL: @f1(
+; CHECK-NEXT:    [[V:%.*]] = icmp sge i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[V]]
+;
+  %t = sub nsw i64 %a, %b
+  %v = icmp sge i64 %t, 0
+  ret i1 %v
+}
+
+define <2 x i1> @f1_vec(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @f1_vec(
+; CHECK-NEXT:    [[V:%.*]] = icmp sge <2 x i64> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[V]]
+;
+  %t = sub nsw <2 x i64> %a, %b
+  %v = icmp sgt <2 x i64> %t, <i64 -1, i64 -1>
+  ret <2 x i1> %v
+}
+
+define i1 @f2(i64 %a, i64 %b) {
+; CHECK-LABEL: @f2(
+; CHECK-NEXT:    [[V:%.*]] = icmp sgt i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[V]]
+;
+  %t = sub nsw i64 %a, %b
+  %v = icmp sgt i64 %t, 0
+  ret i1 %v
+}
+
+define <2 x i1> @f2_vec(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @f2_vec(
+; CHECK-NEXT:    [[V:%.*]] = icmp sgt <2 x i64> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[V]]
+;
+  %t = sub nsw <2 x i64> %a, %b
+  %v = icmp sgt <2 x i64> %t, zeroinitializer
+  ret <2 x i1> %v
+}
+
+define i1 @f3(i64 %a, i64 %b) {
+; CHECK-LABEL: @f3(
+; CHECK-NEXT:    [[V:%.*]] = icmp slt i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[V]]
+;
+  %t = sub nsw i64 %a, %b
+  %v = icmp slt i64 %t, 0
+  ret i1 %v
+}
+
+define <2 x i1> @f3_vec(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @f3_vec(
+; CHECK-NEXT:    [[V:%.*]] = icmp slt <2 x i64> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[V]]
+;
+  %t = sub nsw <2 x i64> %a, %b
+  %v = icmp slt <2 x i64> %t, zeroinitializer
+  ret <2 x i1> %v
+}
+
+define i1 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: @f4(
+; CHECK-NEXT:    [[V:%.*]] = icmp sle i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[V]]
+;
+  %t = sub nsw i64 %a, %b
+  %v = icmp sle i64 %t, 0
+  ret i1 %v
+}
+
+define <2 x i1> @f4_vec(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @f4_vec(
+; CHECK-NEXT:    [[V:%.*]] = icmp sle <2 x i64> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[V]]
+;
+  %t = sub nsw <2 x i64> %a, %b
+  %v = icmp slt <2 x i64> %t, <i64 1, i64 1>
+  ret <2 x i1> %v
+}
+
+define i32 @f5(i8 %a, i8 %b) {
+; CHECK-LABEL: @f5(
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i8 [[B:%.*]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV3]]
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[SUB]], 0
+; CHECK-NEXT:    [[SUB7:%.*]] = sub nsw i32 0, [[SUB]]
+; CHECK-NEXT:    [[SUB7_SUB:%.*]] = select i1 [[CMP4]], i32 [[SUB7]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SUB7_SUB]]
+;
+  %conv = zext i8 %a to i32
+  %conv3 = zext i8 %b to i32
+  %sub = sub nsw i32 %conv, %conv3
+  %cmp4 = icmp slt i32 %sub, 0
+  %sub7 = sub nsw i32 0, %sub
+  %sub7.sub = select i1 %cmp4, i32 %sub7, i32 %sub
+  ret i32 %sub7.sub
+}
+
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: @f6(
+; CHECK-NEXT:    [[CMP_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP_MASK:%.*]] = and i32 [[CMP_UNSHIFTED]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CMP_MASK]], 0
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i32 10000, i32 0
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %sext = shl i32 %a, 24
+  %conv = ashr i32 %sext, 24
+  %sext6 = shl i32 %b, 24
+  %conv4 = ashr i32 %sext6, 24
+  %cmp = icmp eq i32 %conv, %conv4
+  %s = select i1 %cmp, i32 10000, i32 0
+  ret i32 %s
+}
+
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK-LABEL: @f7(
+; CHECK-NEXT:    [[CMP_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP_MASK:%.*]] = and i32 [[CMP_UNSHIFTED]], 511
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CMP_MASK]], 0
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i32 0, i32 10000
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %sext = shl i32 %a, 23
+  %sext6 = shl i32 %b, 23
+  %cmp = icmp ne i32 %sext, %sext6
+  %s = select i1 %cmp, i32 10000, i32 0
+  ret i32 %s
+}
+
+define i1 @f8(i32 %val, i32 %lim) {
+; CHECK-LABEL: @f8(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[LIM:%.*]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %lim.sub = add i32 %lim, -1
+  %val.and = and i32 %val, %lim.sub
+  %r = icmp ult i32 %val.and, %lim
+  ret i1 %r
+}
+
+define i1 @f9(i32 %val, i32 %lim) {
+; CHECK-LABEL: @f9(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[LIM:%.*]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %lim.sub = sub i32 %lim, 1
+  %val.and = and i32 %val, %lim.sub
+  %r = icmp ult i32 %val.and, %lim
+  ret i1 %r
+}
+
+define i1 @f10(i16 %p) {
+; CHECK-LABEL: @f10(
+; CHECK-NEXT:    [[CMP580:%.*]] = icmp uge i16 [[P:%.*]], mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16))
+; CHECK-NEXT:    ret i1 [[CMP580]]
+;
+  %cmp580 = icmp ule i16 mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16)), %p
+  ret i1 %cmp580
+}
+
+; Note: fptosi is used in various tests below to ensure that operand complexity
+; canonicalization does not kick in, which would make some of the tests
+; equivalent to one another.
+
+define i1 @cmp_sgt_rhs_dec(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_sgt_rhs_dec(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[CONV]], [[I:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %dec = sub nsw i32 %i, 1
+  %cmp = icmp sgt i32 %conv, %dec
+  ret i1 %cmp
+}
+
+define i1 @cmp_sle_rhs_dec(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_sle_rhs_dec(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CONV]], [[I:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %dec = sub nsw i32 %i, 1
+  %cmp = icmp sle i32 %conv, %dec
+  ret i1 %cmp
+}
+
+define i1 @cmp_sge_rhs_inc(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_sge_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[I:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp sge i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_slt_rhs_inc(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_slt_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[CONV]], [[I:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @PR26407(i32 %x, i32 %y) {
+; CHECK-LABEL: @PR26407(
+; CHECK-NEXT:    [[ADDX:%.*]] = add i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[ADDY:%.*]] = add i32 [[Y:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[ADDX]], [[ADDY]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %addx = add i32 %x, 2147483647
+  %addy = add i32 %y, 2147483647
+  %cmp = icmp uge i32 %addx, %addy
+  ret i1 %cmp
+}
+
+define i1 @cmp_inverse_mask_bits_set_eq(i32 %x) {
+; CHECK-LABEL: @cmp_inverse_mask_bits_set_eq(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -43
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], -43
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %or = or i32 %x, 42
+  %cmp = icmp eq i32 %or, -1
+  ret i1 %cmp
+}
+
+define <2 x i1> @cmp_inverse_mask_bits_set_eq_vec(<2 x i32> %x) {
+; CHECK-LABEL: @cmp_inverse_mask_bits_set_eq_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -43, i32 -43>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 -43, i32 -43>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %or = or <2 x i32> %x, <i32 42, i32 42>
+  %cmp = icmp eq <2 x i32> %or, <i32 -1, i32 -1>
+  ret <2 x i1> %cmp
+}
+
+define i1 @cmp_inverse_mask_bits_set_ne(i32 %x) {
+; CHECK-LABEL: @cmp_inverse_mask_bits_set_ne(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -43
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP1]], -43
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %or = or i32 %x, 42
+  %cmp = icmp ne i32 %or, -1
+  ret i1 %cmp
+}
+
+; When canonicalizing to 'gt/lt', make sure the constant is correct.
+
+define i1 @PR27792(i128 %a) {
+; CHECK-LABEL: @PR27792(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i128 [[A:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp sge i128 %a, 0
+  ret i1 %cmp
+}
+
+define i1 @PR27792_2(i128 %a) {
+; CHECK-LABEL: @PR27792_2(
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i128 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %b = icmp uge i128 %a, 1
+  ret i1 %b
+}
+
+define i1 @ugtMaxSignedVal(i8 %a) {
+; CHECK-LABEL: @ugtMaxSignedVal(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp ugt i8 %a, 127
+  ret i1 %cmp
+}
+
+define <2 x i1> @ugtMaxSignedValVec(<2 x i8> %a) {
+; CHECK-LABEL: @ugtMaxSignedValVec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %cmp = icmp ugt <2 x i8> %a, <i8 127, i8 127>
+  ret <2 x i1> %cmp
+}
+
+define i1 @ugtKnownBits(i8 %a) {
+; CHECK-LABEL: @ugtKnownBits(
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[A:%.*]], 17
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[B]], 17
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %b = and i8 %a, 17
+  %cmp = icmp ugt i8 %b, 16
+  ret i1 %cmp
+}
+
+define <2 x i1> @ugtKnownBitsVec(<2 x i8> %a) {
+; CHECK-LABEL: @ugtKnownBitsVec(
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[A:%.*]], <i8 17, i8 17>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[B]], <i8 17, i8 17>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %b = and <2 x i8> %a, <i8 17, i8 17>
+  %cmp = icmp ugt <2 x i8> %b, <i8 16, i8 16>
+  ret <2 x i1> %cmp
+}
+
+define i1 @or_ptrtoint_mismatch(i8* %p, i32* %q) {
+; CHECK-LABEL: @or_ptrtoint_mismatch(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8* [[P:%.*]], null
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[Q:%.*]], null
+; CHECK-NEXT:    [[B:%.*]] = and i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[B]]
+;
+
+  %pp = ptrtoint i8* %p to i64
+  %qq = ptrtoint i32* %q to i64
+  %o = or i64 %pp, %qq
+  %b = icmp eq i64 %o, 0
+  ret i1 %b
+}
+
+define i1 @icmp_add1_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add1_ugt(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nuw i32 %x, 1
+  %cmp = icmp ugt i32 %add, %y
+  ret i1 %cmp
+}
+
+define i1 @icmp_add1_ule(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add1_ule(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add nuw i32 %x, 1
+  %cmp = icmp ule i32 %add, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_uge_rhs_inc(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_uge_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[CONV]], [[I:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nuw i32 %i, 1
+  %cmp = icmp uge i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_ult_rhs_inc(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_ult_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i32 [[CONV]], [[I:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nuw i32 %i, 1
+  %cmp = icmp ult i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_sge_lhs_inc(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_sge_lhs_inc(
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[INC]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %inc = add nsw i32 %x, 1
+  %cmp = icmp sge i32 %inc, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_uge_lhs_inc(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_uge_lhs_inc(
+; CHECK-NEXT:    [[INC:%.*]] = add nuw i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[INC]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %inc = add nuw i32 %x, 1
+  %cmp = icmp uge i32 %inc, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_sgt_lhs_dec(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_sgt_lhs_dec(
+; CHECK-NEXT:    [[DEC:%.*]] = add nsw i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %dec = sub nsw i32 %x, 1
+  %cmp = icmp sgt i32 %dec, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_ugt_lhs_dec(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_ugt_lhs_dec(
+; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %dec = sub nuw i32 %x, 1
+  %cmp = icmp ugt i32 %dec, %y
+  ret i1 %cmp
+}
+
+define i1 @cmp_sle_rhs_inc(float %x, i32 %y) {
+; CHECK-LABEL: @cmp_sle_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[INC]], [[CONV]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nsw i32 %y, 1
+  %cmp = icmp sle i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_ule_rhs_inc(float %x, i32 %y) {
+; CHECK-LABEL: @cmp_ule_rhs_inc(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[INC:%.*]] = add nuw i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[INC]], [[CONV]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %inc = add nuw i32 %y, 1
+  %cmp = icmp ule i32 %conv, %inc
+  ret i1 %cmp
+}
+
+define i1 @cmp_slt_rhs_dec(float %x, i32 %y) {
+; CHECK-LABEL: @cmp_slt_rhs_dec(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[DEC:%.*]] = add nsw i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[DEC]], [[CONV]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %dec = sub nsw i32 %y, 1
+  %cmp = icmp slt i32 %conv, %dec
+  ret i1 %cmp
+}
+
+define i1 @cmp_ult_rhs_dec(float %x, i32 %y) {
+; CHECK-LABEL: @cmp_ult_rhs_dec(
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[X:%.*]] to i32
+; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC]], [[CONV]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %conv = fptosi float %x to i32
+  %dec = sub nuw i32 %y, 1
+  %cmp = icmp ult i32 %conv, %dec
+  ret i1 %cmp
+}
+
+define i1 @eq_add_constants(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_add_constants(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = add i32 %x, 5
+  %B = add i32 %y, 5
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
+
+define i1 @eq_mul_constants(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_mul_constants(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = mul i32 %x, 5
+  %B = mul i32 %y, 5
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @eq_mul_constants_splat(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = mul <2 x i32> %x, <i32 5, i32 5>
+  %B = mul <2 x i32> %y, <i32 5, i32 5>
+  %C = icmp ne <2 x i32> %A, %B
+  ret <2 x i1> %C
+}
+
+; If the multiply constant has any trailing zero bits, we get something completely different.
+; We mask off the high bits of each input and then convert:
+; (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+
+define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_mul_constants_with_tz(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = mul i32 %x, 12
+  %B = mul i32 %y, 12
+  %C = icmp ne i32 %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @eq_mul_constants_with_tz_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %A = mul <2 x i32> %x, <i32 12, i32 12>
+  %B = mul <2 x i32> %y, <i32 12, i32 12>
+  %C = icmp eq <2 x i32> %A, %B
+  ret <2 x i1> %C
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define i1 @bswap_ne(i32 %x, i32 %y) {
+; CHECK-LABEL: @bswap_ne(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %swapx = call i32 @llvm.bswap.i32(i32 %x)
+  %swapy = call i32 @llvm.bswap.i32(i32 %y)
+  %cmp = icmp ne i32 %swapx, %swapy
+  ret i1 %cmp
+}
+
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+
+define <8 x i1> @bswap_vec_eq(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: @bswap_vec_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i16> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <8 x i1> [[CMP]]
+;
+  %swapx = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %x)
+  %swapy = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %y)
+  %cmp = icmp eq <8 x i16> %swapx, %swapy
+  ret <8 x i1> %cmp
+}
+
+declare i64 @llvm.bitreverse.i64(i64)
+
+define i1 @bitreverse_eq(i64 %x, i64 %y) {
+; CHECK-LABEL: @bitreverse_eq(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %revx = call i64 @llvm.bitreverse.i64(i64 %x)
+  %revy = call i64 @llvm.bitreverse.i64(i64 %y)
+  %cmp = icmp eq i64 %revx, %revy
+  ret i1 %cmp
+}
+
+declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
+
+define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: @bitreverse_vec_ne(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i16> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <8 x i1> [[CMP]]
+;
+  %revx = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %x)
+  %revy = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %y)
+  %cmp = icmp ne <8 x i16> %revx, %revy
+  ret <8 x i1> %cmp
+}
+
+; These perform a comparison of a value known to be between 4 and 5 with a value between 5 and 7.
+; They should all simplify to equality compares.
+define i1 @knownbits1(i8 %a, i8 %b) {
+; CHECK-LABEL: @knownbits1(
+; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], 1
+; CHECK-NEXT:    [[A2:%.*]] = or i8 [[A1]], 4
+; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
+; CHECK-NEXT:    [[B2:%.*]] = or i8 [[B1]], 5
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a1 = and i8 %a, 5
+  %a2 = or i8 %a1, 4
+  %b1 = and i8 %b, 7
+  %b2 = or i8 %b1, 5
+  %c = icmp uge i8 %a2, %b2
+  ret i1 %c
+}
+
+define i1 @knownbits2(i8 %a, i8 %b) {
+; CHECK-LABEL: @knownbits2(
+; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], 1
+; CHECK-NEXT:    [[A2:%.*]] = or i8 [[A1]], 4
+; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
+; CHECK-NEXT:    [[B2:%.*]] = or i8 [[B1]], 5
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a1 = and i8 %a, 5
+  %a2 = or i8 %a1, 4
+  %b1 = and i8 %b, 7
+  %b2 = or i8 %b1, 5
+  %c = icmp ult i8 %a2, %b2
+  ret i1 %c
+}
+
+define i1 @knownbits3(i8 %a, i8 %b) {
+; CHECK-LABEL: @knownbits3(
+; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], 1
+; CHECK-NEXT:    [[A2:%.*]] = or i8 [[A1]], 4
+; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
+; CHECK-NEXT:    [[B2:%.*]] = or i8 [[B1]], 5
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[B2]], [[A2]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a1 = and i8 %a, 5
+  %a2 = or i8 %a1, 4
+  %b1 = and i8 %b, 7
+  %b2 = or i8 %b1, 5
+  %c = icmp ule i8 %b2, %a2
+  ret i1 %c
+}
+
+define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @knownbits4(
+; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[A2:%.*]] = or <2 x i8> [[A1]], <i8 4, i8 4>
+; CHECK-NEXT:    [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
+; CHECK-NEXT:    [[B2:%.*]] = or <2 x i8> [[B1]], <i8 5, i8 5>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %a1 = and <2 x i8> %a, <i8 5, i8 5>
+  %a2 = or <2 x i8> %a1, <i8 4, i8 4>
+  %b1 = and <2 x i8> %b, <i8 7, i8 7>
+  %b2 = or <2 x i8> %b1, <i8 5, i8 5>
+  %c = icmp ugt <2 x i8> %b2, %a2
+  ret <2 x i1> %c
+}
+
+; These are the signed versions of the above. One value is less than or equal to 5, but maybe negative.
+; The other is known to be a value 5-7. These should simplify to equality comparisons.
+define i1 @knownbits5(i8 %a, i8 %b) {
+; CHECK-LABEL: @knownbits5(
+; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], -127
+; CHECK-NEXT:    [[A2:%.*]] = or i8 [[A1]], 4
+; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
+; CHECK-NEXT:    [[B2:%.*]] = or i8 [[B1]], 5
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a1 = and i8 %a, 133
+  %a2 = or i8 %a1, 4
+  %b1 = and i8 %b, 7
+  %b2 = or i8 %b1, 5
+  %c = icmp sge i8 %a2, %b2
+  ret i1 %c
+}
+
+define i1 @knownbits6(i8 %a, i8 %b) {
+; CHECK-LABEL: @knownbits6(
+; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], -127
+; CHECK-NEXT:    [[A2:%.*]] = or i8 [[A1]], 4
+; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
+; CHECK-NEXT:    [[B2:%.*]] = or i8 [[B1]], 5
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a1 = and i8 %a, 133
+  %a2 = or i8 %a1, 4
+  %b1 = and i8 %b, 7
+  %b2 = or i8 %b1, 5
+  %c = icmp slt i8 %a2, %b2
+  ret i1 %c
+}
+
+define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @knownbits7(
+; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
+; CHECK-NEXT:    [[A2:%.*]] = or <2 x i8> [[A1]], <i8 4, i8 4>
+; CHECK-NEXT:    [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
+; CHECK-NEXT:    [[B2:%.*]] = or <2 x i8> [[B1]], <i8 5, i8 5>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %a1 = and <2 x i8> %a, <i8 133, i8 133>
+  %a2 = or <2 x i8> %a1, <i8 4, i8 4>
+  %b1 = and <2 x i8> %b, <i8 7, i8 7>
+  %b2 = or <2 x i8> %b1, <i8 5, i8 5>
+  %c = icmp sle <2 x i8> %b2, %a2
+  ret <2 x i1> %c
+}
+
+define i1 @knownbits8(i8 %a, i8 %b) {
+; CHECK-LABEL: @knownbits8(
+; CHECK-NEXT:    [[A1:%.*]] = and i8 [[A:%.*]], -127
+; CHECK-NEXT:    [[A2:%.*]] = or i8 [[A1]], 4
+; CHECK-NEXT:    [[B1:%.*]] = and i8 [[B:%.*]], 2
+; CHECK-NEXT:    [[B2:%.*]] = or i8 [[B1]], 5
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[B2]], [[A2]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a1 = and i8 %a, 133
+  %a2 = or i8 %a1, 4
+  %b1 = and i8 %b, 7
+  %b2 = or i8 %b1, 5
+  %c = icmp sgt i8 %b2, %a2
+  ret i1 %c
+}
+
+; Make sure InstCombine doesn't try too hard to simplify the icmp and break the abs idiom
+define i32 @abs_preserve(i32 %x) {
+; CHECK-LABEL: @abs_preserve(
+; CHECK-NEXT:    [[A:%.*]] = shl nsw i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[C]], i32 [[NEGA]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %a = mul nsw i32 %x, 2
+  %c = icmp sge i32 %a, 0
+  %nega = sub i32 0, %a
+  %abs = select i1 %c, i32 %a, i32 %nega
+  ret i32 %abs
+}
+
+; Don't crash by assuming the compared values are integers.
+
+declare void @llvm.assume(i1)
+define i1 @PR35794(i32* %a) {
+; CHECK-LABEL: @PR35794(
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i32* [[A:%.*]], null
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = icmp sgt i32* %a, inttoptr (i64 -1 to i32*)
+  %maskcond = icmp eq i32* %a, null
+  tail call void @llvm.assume(i1 %maskcond)
+  ret i1 %cmp
+}
+
+; Don't crash by assuming the compared values are integers.
+define <2 x i1> @PR36583(<2 x i8*>)  {
+; CHECK-LABEL: @PR36583(
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i8*> [[TMP0:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
+  %cast = ptrtoint <2 x i8*> %0 to <2 x i64>
+  %res = icmp eq <2 x i64> %cast, zeroinitializer
+  ret <2 x i1> %res
+}
+
+; fold (icmp pred (sub (0, X)) C1) for vec type
+define <2 x i32> @Op1Negated_Vec(<2 x i32> %x) {
+; CHECK-LABEL: @Op1Negated_Vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %sub, <2 x i32> %x
+  ret <2 x i32> %cond
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp_sdiv_with_and_without_range.ll b/llvm/test/Transforms/InstCombine/icmp_sdiv_with_and_without_range.ll
new file mode 100644
index 00000000000..174c4b906e3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp_sdiv_with_and_without_range.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Test that presence of range does not cause unprofitable transforms with bit
+; arithmetics, and instcombine behaves exactly the same as without the range.
+
+define i1 @without_range(i32* %A) {
+; CHECK-LABEL: @without_range(
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A:%.*]], align 8
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A_VAL]], 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A.val = load i32, i32* %A, align 8
+  %B = sdiv i32 %A.val, 2
+  %C = icmp sge i32 0, %B
+  ret i1 %C
+}
+
+define i1 @with_range(i32* %A) {
+; CHECK-LABEL: @with_range(
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A:%.*]], align 8, !range !0
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i32 [[A_VAL]], 2147483646
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[B_MASK]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A.val = load i32, i32* %A, align 8, !range !0
+  %B = sdiv i32 %A.val, 2
+  %C = icmp sge i32 0, %B
+  ret i1 %C
+}
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/llvm/test/Transforms/InstCombine/idioms.ll b/llvm/test/Transforms/InstCombine/idioms.ll
new file mode 100644
index 00000000000..58485442230
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/idioms.ll
@@ -0,0 +1,32 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Check that code corresponding to the following C function is
+; simplified into a single ASR operation:
+;
+; int test_asr(int a, int b) {
+;   return a < 0 ? -(-a - 1 >> b) - 1 : a >> b;
+; }
+;
+define i32 @test_asr(i32 %a, i32 %b) {
+entry:
+	%c = icmp slt i32 %a, 0
+	br i1 %c, label %bb2, label %bb3
+
+bb2:
+	%t1 = sub i32 0, %a
+	%not = sub i32 %t1, 1
+	%d = ashr i32 %not, %b
+	%t2 = sub i32 0, %d
+	%not2 = sub i32 %t2, 1
+	br label %bb4
+bb3:
+	%e = ashr i32 %a, %b
+	br label %bb4
+bb4:
+        %f = phi i32 [ %not2, %bb2 ], [ %e, %bb3 ]
+	ret i32 %f
+; CHECK-LABEL: @test_asr(
+; CHECK: bb4:
+; CHECK: %f = ashr i32 %a, %b
+; CHECK: ret i32 %f
+}
diff --git a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll
new file mode 100644
index 00000000000..71afed438d1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll
@@ -0,0 +1,207 @@
+; RUN: opt -instcombine -S  < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 *@test1(i32* %A, i32 %Offset) {
+entry:
+  %tmp = getelementptr inbounds i32, i32* %A, i32 %Offset
+  br label %bb
+
+bb:
+  %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
+  %LHS = getelementptr inbounds i32, i32* %A, i32 100
+  %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+  %cond = icmp ult i32 * %LHS, %RHS
+  br i1 %cond, label %bb2, label %bb
+
+bb2:
+  ret i32* %RHS
+
+; CHECK-LABEL: @test1(
+; CHECK:  %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ]
+; CHECK:  %[[ADD]] = add nsw i32 %[[INDEX]], 1
+; CHECK:  %cond = icmp sgt i32 %[[INDEX]], 100
+; CHECK:  br i1 %cond, label %bb2, label %bb
+; CHECK:  %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %A, i32 %[[INDEX]]
+; CHECK:  ret i32* %[[PTR]]
+}
+
+define i32 *@test2(i32 %A, i32 %Offset) {
+entry:
+  %A.ptr = inttoptr i32 %A to i32*
+  %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset
+  br label %bb
+
+bb:
+  %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
+  %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100
+  %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+  %cmp0 = ptrtoint i32 *%LHS to i32
+  %cmp1 = ptrtoint i32 *%RHS to i32
+  %cond = icmp ult i32 %cmp0, %cmp1
+  br i1 %cond, label %bb2, label %bb
+
+bb2:
+  ret i32* %RHS
+
+; CHECK-LABEL: @test2(
+; CHECK:  %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ]
+; CHECK:  %[[ADD]] = add nsw i32 %[[INDEX]], 1
+; CHECK:  %cond = icmp sgt i32 %[[INDEX]], 100
+; CHECK:  br i1 %cond, label %bb2, label %bb
+; CHECK:  %[[TOPTR:[0-9A-Za-z.]+]] = inttoptr i32 %[[ADD:[0-9A-Za-z.]+]] to i32*
+; CHECK:  %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %[[TOPTR]], i32 %[[INDEX]]
+; CHECK:  ret i32* %[[PTR]]
+}
+
+; Perform the transformation only if we know that the GEPs used are inbounds.
+define i32 *@test3(i32* %A, i32 %Offset) {
+entry:
+  %tmp = getelementptr i32, i32* %A, i32 %Offset
+  br label %bb
+
+bb:
+  %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
+  %LHS = getelementptr i32, i32* %A, i32 100
+  %RHS.next = getelementptr i32, i32* %RHS, i64 1
+  %cond = icmp ult i32 * %LHS, %RHS
+  br i1 %cond, label %bb2, label %bb
+
+bb2:
+  ret i32* %RHS
+
+; CHECK-LABEL: @test3(
+; CHECK-NOT:  %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100
+}
+
+; An inttoptr that requires an extension or truncation will be opaque when determining
+; the base pointer. In this case we can still perform the transformation by considering
+; A.ptr as being the base pointer.
+define i32 *@test4(i16 %A, i32 %Offset) {
+entry:
+  %A.ptr = inttoptr i16 %A to i32*
+  %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset
+  br label %bb
+
+bb:
+  %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
+  %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100
+  %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+  %cmp0 = ptrtoint i32 *%LHS to i32
+  %cmp1 = ptrtoint i32 *%RHS to i32
+  %cond = icmp ult i32 %cmp0, %cmp1
+  br i1 %cond, label %bb2, label %bb
+
+bb2:
+  ret i32* %RHS
+
+; CHECK-LABEL: @test4(
+; CHECK:  %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100
+}
+
+declare i32* @fun_ptr()
+
+define i32 *@test5(i32 %Offset) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %A = invoke i32 *@fun_ptr() to label %cont unwind label %lpad
+
+cont:
+  %tmp = getelementptr inbounds i32, i32* %A, i32 %Offset
+  br label %bb
+
+bb:
+  %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %cont ]
+  %LHS = getelementptr inbounds i32, i32* %A, i32 100
+  %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+  %cond = icmp ult i32 * %LHS, %RHS
+  br i1 %cond, label %bb2, label %bb
+
+bb2:
+  ret i32* %RHS
+
+lpad:
+  %l = landingpad { i8*, i32 } cleanup
+  ret i32* null
+
+; CHECK-LABEL: @test5(
+; CHECK:  %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %cont ]
+; CHECK:  %[[ADD]] = add nsw i32 %[[INDEX]], 1
+; CHECK:  %cond = icmp sgt i32 %[[INDEX]], 100
+; CHECK:  br i1 %cond, label %bb2, label %bb
+; CHECK:  %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %A, i32 %[[INDEX]]
+; CHECK:  ret i32* %[[PTR]]
+}
+
+declare i32 @fun_i32()
+
+define i32 *@test6(i32 %Offset) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %A = invoke i32 @fun_i32() to label %cont unwind label %lpad
+
+cont:
+  %A.ptr = inttoptr i32 %A to i32*
+  %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset
+  br label %bb
+
+bb:
+  %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %cont ]
+  %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100
+  %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+  %cond = icmp ult i32 * %LHS, %RHS
+  br i1 %cond, label %bb2, label %bb
+
+bb2:
+  ret i32* %RHS
+
+lpad:
+  %l = landingpad { i8*, i32 } cleanup
+  ret i32* null
+
+; CHECK-LABEL: @test6(
+; CHECK:  %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %cont ]
+; CHECK:  %[[ADD]] = add nsw i32 %[[INDEX]], 1
+; CHECK:  %cond = icmp sgt i32 %[[INDEX]], 100
+; CHECK:  br i1 %cond, label %bb2, label %bb
+; CHECK:  %[[TOPTR:[0-9A-Za-z.]+]] = inttoptr i32 %[[ADD:[0-9A-Za-z.]+]] to i32*
+; CHECK:  %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %[[TOPTR]], i32 %[[INDEX]]
+; CHECK:  ret i32* %[[PTR]]
+}
+
+
+@pr30402 = constant i64 3
+define i1 @test7() {
+entry:
+  br label %bb7
+
+bb7:                                              ; preds = %bb10, %entry-block
+  %phi = phi i64* [ @pr30402, %entry ], [ getelementptr inbounds (i64, i64* @pr30402, i32 1), %bb7 ]
+  %cmp = icmp eq i64* %phi, getelementptr inbounds (i64, i64* @pr30402, i32 1)
+  br i1 %cmp, label %bb10, label %bb7
+
+bb10:
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test7(
+; CHECK:  %[[phi:.*]] = phi i64* [ @pr30402, %entry ], [ getelementptr inbounds (i64, i64* @pr30402, i32 1), %bb7 ]
+; CHECK:  %[[cmp:.*]] = icmp eq i64* %[[phi]], getelementptr inbounds (i64, i64* @pr30402, i32 1)
+; CHECK: ret i1 %[[cmp]]
+
+
+declare i32 @__gxx_personality_v0(...)
+
+define i1 @test8(i64* %in, i64 %offset) {
+entry:
+
+ %ld = load i64, i64* %in, align 8
+ %casti8 = inttoptr i64 %ld to i8*
+ %gepi8 = getelementptr inbounds i8, i8* %casti8, i64 %offset
+ %cast = bitcast i8* %gepi8 to i32**
+ %ptrcast = inttoptr i64 %ld to i32**
+ %gepi32 = getelementptr inbounds i32*, i32** %ptrcast, i64 1
+ %cmp = icmp eq i32** %gepi32, %cast
+ ret i1 %cmp
+
+
+; CHECK-LABEL: @test8(
+; CHECK-NOT: icmp eq i32 %{{[0-9A-Za-z.]+}}, 1
+}
diff --git a/llvm/test/Transforms/InstCombine/inline-intrinsic-assert.ll b/llvm/test/Transforms/InstCombine/inline-intrinsic-assert.ll
new file mode 100644
index 00000000000..8eecb3fd40a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/inline-intrinsic-assert.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; PR22857: http://llvm.org/bugs/show_bug.cgi?id=22857
+; The inliner should not add an edge to an intrinsic and
+; then assert that it did not add an edge to an intrinsic!
+
+define float @foo(float %f1) {
+  %call = call float @bar(float %f1)
+  ret float %call
+
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: call fast float @llvm.fabs.f32
+; CHECK-NEXT: ret float
+}
+
+define float @bar(float %f1) {
+  %call = call float @sqr(float %f1)
+  %call1 = call fast float @sqrtf(float %call)
+  ret float %call1
+}
+
+define float @sqr(float %f) {
+  %mul = fmul fast float %f, %f
+  ret float %mul
+}
+
+declare float @sqrtf(float)
+
diff --git a/llvm/test/Transforms/InstCombine/inselt-binop.ll b/llvm/test/Transforms/InstCombine/inselt-binop.ll
new file mode 100644
index 00000000000..882a1318c68
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/inselt-binop.ll
@@ -0,0 +1,635 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+define <2 x i8> @add_constant(i8 %x) {
+; CHECK-LABEL: @add_constant(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = add <2 x i8> [[INS]], <i8 42, i8 undef>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = add <2 x i8> %ins, <i8 42, i8 undef>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @add_constant_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @add_constant_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = add <2 x i8> [[INS]], <i8 42, i8 -42>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = add <2 x i8> %ins, <i8 42, i8 -42>
+  ret <2 x i8> %bo
+}
+
+; IR flags are not required, but they should propagate.
+
+define <2 x i8> @sub_constant_op0(i8 %x) {
+; CHECK-LABEL: @sub_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = sub nuw nsw <2 x i8> <i8 undef, i8 -42>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = sub nsw nuw <2 x i8> <i8 undef, i8 -42>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @sub_constant_op0_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @sub_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = sub nuw <2 x i8> <i8 42, i8 -42>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = sub nuw <2 x i8> <i8 42, i8 -42>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @sub_constant_op1(i8 %x) {
+; CHECK-LABEL: @sub_constant_op1(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = add <2 x i8> [[INS]], <i8 -42, i8 undef>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = sub nuw <2 x i8> %ins, <i8 42, i8 undef>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @sub_constant_op1_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @sub_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = add <2 x i8> [[INS]], <i8 -42, i8 42>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = sub nuw <2 x i8> %ins, <i8 42, i8 -42>
+  ret <2 x i8> %bo
+}
+
+define <3 x i8> @mul_constant(i8 %x) {
+; CHECK-LABEL: @mul_constant(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 2
+; CHECK-NEXT:    [[BO:%.*]] = mul <3 x i8> [[INS]], <i8 undef, i8 undef, i8 -42>
+; CHECK-NEXT:    ret <3 x i8> [[BO]]
+;
+  %ins = insertelement <3 x i8> undef, i8 %x, i32 2
+  %bo = mul <3 x i8> %ins, <i8 undef, i8 undef, i8 -42>
+  ret <3 x i8> %bo
+}
+
+define <3 x i8> @mul_constant_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @mul_constant_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 2
+; CHECK-NEXT:    [[BO:%.*]] = mul <3 x i8> [[INS]], <i8 42, i8 undef, i8 -42>
+; CHECK-NEXT:    ret <3 x i8> [[BO]]
+;
+  %ins = insertelement <3 x i8> undef, i8 %x, i32 2
+  %bo = mul <3 x i8> %ins, <i8 42, i8 undef, i8 -42>
+  ret <3 x i8> %bo
+}
+
+define <2 x i8> @shl_constant_op0(i8 %x) {
+; CHECK-LABEL: @shl_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = shl <2 x i8> <i8 undef, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = shl <2 x i8> <i8 undef, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @shl_constant_op0_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @shl_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = shl <2 x i8> <i8 5, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = shl <2 x i8> <i8 5, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @shl_constant_op1(i8 %x) {
+; CHECK-LABEL: @shl_constant_op1(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = shl nuw <2 x i8> [[INS]], <i8 5, i8 undef>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = shl nuw <2 x i8> %ins, <i8 5, i8 undef>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @shl_constant_op1_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @shl_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = shl nuw <2 x i8> [[INS]], <i8 5, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = shl nuw <2 x i8> %ins, <i8 5, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @ashr_constant_op0(i8 %x) {
+; CHECK-LABEL: @ashr_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = ashr exact <2 x i8> <i8 undef, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = ashr exact <2 x i8> <i8 undef, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @ashr_constant_op0_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @ashr_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = lshr <2 x i8> <i8 5, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = ashr exact <2 x i8> <i8 5, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @ashr_constant_op1(i8 %x) {
+; CHECK-LABEL: @ashr_constant_op1(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = ashr <2 x i8> [[INS]], <i8 5, i8 undef>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = ashr <2 x i8> %ins, <i8 5, i8 undef>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @ashr_constant_op1_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @ashr_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = ashr <2 x i8> [[INS]], <i8 5, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = ashr <2 x i8> %ins, <i8 5, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @lshr_constant_op0(i8 %x) {
+; CHECK-LABEL: @lshr_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = lshr <2 x i8> <i8 5, i8 undef>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = lshr <2 x i8> <i8 5, i8 undef>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @lshr_constant_op0_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @lshr_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = lshr <2 x i8> <i8 5, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = lshr <2 x i8> <i8 5, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @lshr_constant_op1(i8 %x) {
+; CHECK-LABEL: @lshr_constant_op1(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = lshr exact <2 x i8> [[INS]], <i8 undef, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = lshr exact <2 x i8> %ins, <i8 undef, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @lshr_constant_op1_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @lshr_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = lshr exact <2 x i8> [[INS]], <i8 5, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = lshr exact <2 x i8> %ins, <i8 5, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @urem_constant_op0(i8 %x) {
+; CHECK-LABEL: @urem_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = urem <2 x i8> <i8 5, i8 undef>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = urem <2 x i8> <i8 5, i8 undef>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @urem_constant_op0_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @urem_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = urem <2 x i8> <i8 5, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = urem <2 x i8> <i8 5, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @urem_constant_op1(i8 %x) {
+; CHECK-LABEL: @urem_constant_op1(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = urem <2 x i8> %ins, <i8 undef, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @urem_constant_op1_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @urem_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = urem <2 x i8> [[INS]], <i8 5, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = urem <2 x i8> %ins, <i8 5, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @srem_constant_op0(i8 %x) {
+; CHECK-LABEL: @srem_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = srem <2 x i8> <i8 5, i8 undef>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = srem <2 x i8> <i8 5, i8 undef>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @srem_constant_op0_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @srem_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = srem <2 x i8> <i8 5, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = srem <2 x i8> <i8 5, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @srem_constant_op1(i8 %x) {
+; CHECK-LABEL: @srem_constant_op1(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = srem <2 x i8> %ins, <i8 undef, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @srem_constant_op1_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @srem_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = srem <2 x i8> [[INS]], <i8 5, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = srem <2 x i8> %ins, <i8 5, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @udiv_constant_op0(i8 %x) {
+; CHECK-LABEL: @udiv_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = udiv exact <2 x i8> <i8 5, i8 undef>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = udiv exact <2 x i8> <i8 5, i8 undef>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @udiv_constant_op0_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @udiv_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = udiv exact <2 x i8> <i8 5, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = udiv exact <2 x i8> <i8 5, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @udiv_constant_op1(i8 %x) {
+; CHECK-LABEL: @udiv_constant_op1(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = udiv <2 x i8> %ins, <i8 undef, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @udiv_constant_op1_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @udiv_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = udiv <2 x i8> [[INS]], <i8 5, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = udiv <2 x i8> %ins, <i8 5, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @sdiv_constant_op0(i8 %x) {
+; CHECK-LABEL: @sdiv_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = sdiv <2 x i8> <i8 5, i8 undef>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = sdiv <2 x i8> <i8 5, i8 undef>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @sdiv_constant_op0_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = sdiv <2 x i8> <i8 5, i8 2>, [[INS]]
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = sdiv <2 x i8> <i8 5, i8 2>, %ins
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @sdiv_constant_op1(i8 %x) {
+; CHECK-LABEL: @sdiv_constant_op1(
+; CHECK-NEXT:    ret <2 x i8> undef
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = sdiv exact <2 x i8> %ins, <i8 undef, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @sdiv_constant_op1_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @sdiv_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = sdiv exact <2 x i8> [[INS]], <i8 5, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = sdiv exact <2 x i8> %ins, <i8 5, i8 2>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @and_constant(i8 %x) {
+; CHECK-LABEL: @and_constant(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = and <2 x i8> [[INS]], <i8 42, i8 undef>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = and <2 x i8> %ins, <i8 42, i8 undef>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @and_constant_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @and_constant_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = and <2 x i8> [[INS]], <i8 42, i8 -42>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = and <2 x i8> %ins, <i8 42, i8 -42>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @or_constant(i8 %x) {
+; CHECK-LABEL: @or_constant(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = or <2 x i8> [[INS]], <i8 undef, i8 -42>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = or <2 x i8> %ins, <i8 undef, i8 -42>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @or_constant_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @or_constant_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = or <2 x i8> [[INS]], <i8 42, i8 -42>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 1
+  %bo = or <2 x i8> %ins, <i8 42, i8 -42>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @xor_constant(i8 %x) {
+; CHECK-LABEL: @xor_constant(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = xor <2 x i8> [[INS]], <i8 42, i8 undef>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = xor <2 x i8> %ins, <i8 42, i8 undef>
+  ret <2 x i8> %bo
+}
+
+define <2 x i8> @xor_constant_not_undef_lane(i8 %x) {
+; CHECK-LABEL: @xor_constant_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = xor <2 x i8> [[INS]], <i8 42, i8 -42>
+; CHECK-NEXT:    ret <2 x i8> [[BO]]
+;
+  %ins = insertelement <2 x i8> undef, i8 %x, i32 0
+  %bo = xor <2 x i8> %ins, <i8 42, i8 -42>
+  ret <2 x i8> %bo
+}
+
+define <2 x float> @fadd_constant(float %x) {
+; CHECK-LABEL: @fadd_constant(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = fadd <2 x float> [[INS]], <float 4.200000e+01, float undef>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = fadd <2 x float> %ins, <float 42.0, float undef>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fadd_constant_not_undef_lane(float %x) {
+; CHECK-LABEL: @fadd_constant_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = fadd <2 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 1
+  %bo = fadd <2 x float> %ins, <float 42.0, float -42.0>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fsub_constant_op0(float %x) {
+; CHECK-LABEL: @fsub_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = fsub fast <2 x float> <float 4.200000e+01, float undef>, [[INS]]
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = fsub fast <2 x float> <float 42.0, float undef>, %ins
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fsub_constant_op0_not_undef_lane(float %x) {
+; CHECK-LABEL: @fsub_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = fsub nsz <2 x float> <float 4.200000e+01, float -4.200000e+01>, [[INS]]
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 1
+  %bo = fsub nsz <2 x float> <float 42.0, float -42.0>, %ins
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fsub_constant_op1(float %x) {
+; CHECK-LABEL: @fsub_constant_op1(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = fadd <2 x float> [[INS]], <float 0x7FF8000000000000, float -4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 1
+  %bo = fsub <2 x float> %ins, <float undef, float 42.0>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fsub_constant_op1_not_undef_lane(float %x) {
+; CHECK-LABEL: @fsub_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = fadd <2 x float> [[INS]], <float -4.200000e+01, float 4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = fsub <2 x float> %ins, <float 42.0, float -42.0>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fmul_constant(float %x) {
+; CHECK-LABEL: @fmul_constant(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = fmul reassoc <2 x float> [[INS]], <float 4.200000e+01, float undef>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = fmul reassoc <2 x float> %ins, <float 42.0, float undef>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fmul_constant_not_undef_lane(float %x) {
+; CHECK-LABEL: @fmul_constant_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = fmul <2 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 1
+  %bo = fmul <2 x float> %ins, <float 42.0, float -42.0>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fdiv_constant_op0(float %x) {
+; CHECK-LABEL: @fdiv_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = fdiv nnan <2 x float> <float undef, float 4.200000e+01>, [[INS]]
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 1
+  %bo = fdiv nnan <2 x float> <float undef, float 42.0>, %ins
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fdiv_constant_op0_not_undef_lane(float %x) {
+; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = fdiv ninf <2 x float> <float 4.200000e+01, float -4.200000e+01>, [[INS]]
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = fdiv ninf <2 x float> <float 42.0, float -42.0>, %ins
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fdiv_constant_op1(float %x) {
+; CHECK-LABEL: @fdiv_constant_op1(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = fdiv <2 x float> [[INS]], <float 4.200000e+01, float undef>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = fdiv <2 x float> %ins, <float 42.0, float undef>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @fdiv_constant_op1_not_undef_lane(float %x) {
+; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = fdiv <2 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = fdiv <2 x float> %ins, <float 42.0, float -42.0>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @frem_constant_op0(float %x) {
+; CHECK-LABEL: @frem_constant_op0(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = frem fast <2 x float> <float 4.200000e+01, float undef>, [[INS]]
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = frem fast <2 x float> <float 42.0, float undef>, %ins
+  ret <2 x float> %bo
+}
+
+define <2 x float> @frem_constant_op0_not_undef_lane(float %x) {
+; CHECK-LABEL: @frem_constant_op0_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = frem <2 x float> <float 4.200000e+01, float -4.200000e+01>, [[INS]]
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 1
+  %bo = frem <2 x float> <float 42.0, float -42.0>, %ins
+  ret <2 x float> %bo
+}
+
+define <2 x float> @frem_constant_op1(float %x) {
+; CHECK-LABEL: @frem_constant_op1(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 1
+; CHECK-NEXT:    [[BO:%.*]] = frem ninf <2 x float> [[INS]], <float undef, float 4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 1
+  %bo = frem ninf <2 x float> %ins, <float undef, float 42.0>
+  ret <2 x float> %bo
+}
+
+define <2 x float> @frem_constant_op1_not_undef_lane(float %x) {
+; CHECK-LABEL: @frem_constant_op1_not_undef_lane(
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BO:%.*]] = frem nnan <2 x float> [[INS]], <float 4.200000e+01, float -4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[BO]]
+;
+  %ins = insertelement <2 x float> undef, float %x, i32 0
+  %bo = frem nnan <2 x float> %ins, <float 42.0, float -42.0>
+  ret <2 x float> %bo
+}
+
diff --git a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll
new file mode 100644
index 00000000000..3e301e336af
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+; Eliminate the insertelement.
+
+define <4 x float> @PR29126(<4 x float> %x) {
+; CHECK-LABEL: @PR29126(
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 4.200000e+01>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[INS]]
+;
+  %shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  %ins = insertelement <4 x float> %shuf, float 42.0, i32 3
+  ret <4 x float> %ins
+}
+
+; A chain of inserts should collapse.
+
+define <4 x float> @twoInserts(<4 x float> %x) {
+; CHECK-LABEL: @twoInserts(
+; CHECK-NEXT:    [[INS2:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 0.000000e+00, float 4.200000e+01, float 1.100000e+01>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
+  %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  %ins1 = insertelement <4 x float> %shuf, float 42.0, i32 2
+  %ins2 = insertelement <4 x float> %ins1, float 11.0, i32 3
+  ret <4 x float> %ins2
+}
+
+define <4 x i32> @shuffleRetain(<4 x i32> %base) {
+; CHECK-LABEL: @shuffleRetain(
+; CHECK-NEXT:  [[SHUF:%.*]] = shufflevector <4 x i32> %base, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 1>, <4 x i32> <i32 1, i32 2, i32 undef, i32 7>
+; CHECK-NEXT:  ret <4 x i32> [[SHUF]]
+;
+  %shuf = shufflevector <4 x i32> %base, <4 x i32> <i32 4, i32 3, i32 2, i32 1>, <4 x i32> <i32 1, i32 2, i32 undef, i32 7>
+  ret <4 x i32> %shuf
+}
+
+; TODO: Transform an arbitrary shuffle with constant into a shuffle that is equivalant to a vector select.
+
+define <4 x float> @disguisedSelect(<4 x float> %x) {
+; CHECK-LABEL: @disguisedSelect(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 undef, i32 6, i32 5, i32 3>
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i32 0
+; CHECK-NEXT:    ret <4 x float> [[INS]]
+;
+  %shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float 3.0>, <4 x i32> <i32 7, i32 6, i32 5, i32 3>
+  %ins = insertelement <4 x float> %shuf, float 4.0, i32 0
+  ret <4 x float> %ins
+}
+
+; TODO: Fold arbitrary (non-select-equivalent) shuffles if the new shuffle would have the same shuffle mask.
+
+define <4 x float> @notSelectButNoMaskDifference(<4 x float> %x) {
+; CHECK-LABEL: @notSelectButNoMaskDifference(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 1, i32 5, i32 6, i32 undef>
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i32 3
+; CHECK-NEXT:    ret <4 x float> [[INS]]
+;
+  %shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float 3.0>, <4 x i32> <i32 1, i32 5, i32 6, i32 3>
+  %ins = insertelement <4 x float> %shuf, float 4.0, i32 3
+  ret <4 x float> %ins
+}
+
+; We purposely do not touch arbitrary (non-select-equivalent) shuffles because folding the insert may create a more expensive shuffle.
+
+define <4 x float> @tooRisky(<4 x float> %x) {
+; CHECK-LABEL: @tooRisky(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> %x, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 1, i32 4, i32 4, i32 undef>
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i32 3
+; CHECK-NEXT:    ret <4 x float> [[INS]]
+;
+  %shuf = shufflevector <4 x float> %x, <4 x float> <float 1.0, float undef, float undef, float undef>, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+  %ins = insertelement <4 x float> %shuf, float 4.0, i32 3
+  ret <4 x float> %ins
+}
+
+; Don't transform insert to shuffle if the original shuffle is not removed.
+; TODO: Ease the one-use restriction if the insert scalar would simplify the shuffle to a full vector constant?
+
+define <3 x float> @twoShufUses(<3 x float> %x) {
+; CHECK-LABEL: @twoShufUses(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x float> %x, <3 x float> <float undef, float 1.000000e+00, float 2.000000e+00>, <3 x i32> <i32 0, i32 4, i32 5>
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <3 x float> [[SHUF]], float 4.200000e+01, i2 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd <3 x float> [[SHUF]], [[INS]]
+; CHECK-NEXT:    ret <3 x float> [[ADD]]
+;
+  %shuf = shufflevector <3 x float> %x, <3 x float> <float undef, float 1.0, float 2.0>, <3 x i32> <i32 0, i32 4, i32 5>
+  %ins = insertelement <3 x float> %shuf, float 42.0, i2 1
+  %add = fadd <3 x float> %shuf, %ins
+  ret <3 x float> %add
+}
+
+; The inserted scalar constant index is out-of-bounds for the shuffle vector constant.
+
+define <5 x i8> @longerMask(<3 x i8> %x) {
+; CHECK-LABEL: @longerMask(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x i8> %x, <3 x i8> <i8 undef, i8 1, i8 undef>, <5 x i32> <i32 2, i32 1, i32 4, i32 undef, i32 undef>
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <5 x i8> [[SHUF]], i8 42, i17 4
+; CHECK-NEXT:    ret <5 x i8> [[INS]]
+;
+  %shuf = shufflevector <3 x i8> %x, <3 x i8> <i8 undef, i8 1, i8 2>, <5 x i32> <i32 2, i32 1, i32 4, i32 3, i32 0>
+  %ins = insertelement <5 x i8> %shuf, i8 42, i17 4
+  ret <5 x i8> %ins
+}
+
+; TODO: The inserted constant could get folded into the shuffle vector constant.
+
+define <3 x i8> @shorterMask(<5 x i8> %x) {
+; CHECK-LABEL: @shorterMask(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <5 x i8> %x, <5 x i8> undef, <3 x i32> <i32 undef, i32 1, i32 4>
+; CHECK-NEXT:    [[INS:%.*]] = insertelement <3 x i8> [[SHUF]], i8 42, i21 0
+; CHECK-NEXT:    ret <3 x i8> [[INS]]
+;
+  %shuf = shufflevector <5 x i8> %x, <5 x i8> <i8 undef, i8 1, i8 2, i8 3, i8 4>, <3 x i32> <i32 2, i32 1, i32 4>
+  %ins = insertelement <3 x i8> %shuf, i8 42, i21 0
+  ret <3 x i8> %ins
+}
+
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
new file mode 100644
index 00000000000..2de9c66d463
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -0,0 +1,427 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+define <1 x i8> @test1(<8 x i8> %in) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[VEC:%.*]] = shufflevector <8 x i8> [[IN:%.*]], <8 x i8> undef, <1 x i32> <i32 5>
+; CHECK-NEXT:    ret <1 x i8> [[VEC]]
+;
+  %val = extractelement <8 x i8> %in, i32 5
+  %vec = insertelement <1 x i8> undef, i8 %val, i32 0
+  ret <1 x i8> %vec
+}
+
+define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[VEC_3:%.*]] = shufflevector <8 x i16> [[IN2:%.*]], <8 x i16> [[IN:%.*]], <4 x i32> <i32 11, i32 9, i32 0, i32 10>
+; CHECK-NEXT:    ret <4 x i16> [[VEC_3]]
+;
+  %elt0 = extractelement <8 x i16> %in, i32 3
+  %elt1 = extractelement <8 x i16> %in, i32 1
+  %elt2 = extractelement <8 x i16> %in2, i32 0
+  %elt3 = extractelement <8 x i16> %in, i32 2
+
+  %vec.0 = insertelement <4 x i16> undef, i16 %elt0, i32 0
+  %vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1
+  %vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2
+  %vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3
+
+  ret <4 x i16> %vec.3
+}
+
+define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: @test_vcopyq_lane_p64(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[TMP1]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    ret <2 x i64> [[RES]]
+;
+  %elt = extractelement <1 x i64> %b, i32 0
+  %res = insertelement <2 x i64> %a, i64 %elt, i32 1
+  ret <2 x i64> %res
+}
+
+; PR2109: https://llvm.org/bugs/show_bug.cgi?id=2109
+
+define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) {
+; CHECK-LABEL: @widen_extract2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[I2:%.*]] = shufflevector <4 x float> [[INS:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+; CHECK-NEXT:    ret <4 x float> [[I2]]
+;
+  %e1 = extractelement <2 x float> %ext, i32 0
+  %e2 = extractelement <2 x float> %ext, i32 1
+  %i1 = insertelement <4 x float> %ins, float %e1, i32 1
+  %i2 = insertelement <4 x float> %i1, float %e2, i32 3
+  ret <4 x float> %i2
+}
+
+define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) {
+; CHECK-LABEL: @widen_extract3(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> [[EXT:%.*]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT:    [[I3:%.*]] = shufflevector <4 x float> [[INS:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 6, i32 5, i32 4, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[I3]]
+;
+  %e1 = extractelement <3 x float> %ext, i32 0
+  %e2 = extractelement <3 x float> %ext, i32 1
+  %e3 = extractelement <3 x float> %ext, i32 2
+  %i1 = insertelement <4 x float> %ins, float %e1, i32 2
+  %i2 = insertelement <4 x float> %i1, float %e2, i32 1
+  %i3 = insertelement <4 x float> %i2, float %e3, i32 0
+  ret <4 x float> %i3
+}
+
+define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
+; CHECK-LABEL: @widen_extract4(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[I1:%.*]] = shufflevector <8 x float> [[INS:%.*]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[I1]]
+;
+  %e1 = extractelement <2 x float> %ext, i32 0
+  %i1 = insertelement <8 x float> %ins, float %e1, i32 2
+  ret <8 x float> %i1
+}
+
+; PR26015: https://llvm.org/bugs/show_bug.cgi?id=26015
+; The widening shuffle must be inserted before any uses.
+
+define <8 x i16> @pr26015(<4 x i16> %t0) {
+; CHECK-LABEL: @pr26015(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11>
+; CHECK-NEXT:    ret <8 x i16> [[T5]]
+;
+  %t1 = extractelement <4 x i16> %t0, i32 2
+  %t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3
+  %t3 = insertelement <8 x i16> %t2, i16 0, i32 6
+  %t4 = extractelement <4 x i16> %t0, i32 3
+  %t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7
+  ret <8 x i16> %t5
+}
+
+; PR25999: https://llvm.org/bugs/show_bug.cgi?id=25999
+; TODO: The widening shuffle could be inserted at the start of the function to allow the first extract to use it.
+
+define <8 x i16> @pr25999(<4 x i16> %t0, i1 %b) {
+; CHECK-LABEL: @pr25999(
+; CHECK-NEXT:    [[T1:%.*]] = extractelement <4 x i16> [[T0:%.*]], i32 2
+; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[T0]], <4 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 undef>, i16 [[T1]], i32 3
+; CHECK-NEXT:    [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
+; CHECK-NEXT:    ret <8 x i16> [[T5]]
+; CHECK:       end:
+; CHECK-NEXT:    [[A1:%.*]] = add i16 [[T1]], 4
+; CHECK-NEXT:    [[T6:%.*]] = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 [[A1]], i32 0
+; CHECK-NEXT:    ret <8 x i16> [[T6]]
+;
+
+  %t1 = extractelement <4 x i16> %t0, i32 2
+  br i1 %b, label %if, label %end
+
+if:
+  %t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3
+  %t3 = insertelement <8 x i16> %t2, i16 0, i32 6
+  %t4 = extractelement <4 x i16> %t0, i32 3
+  %t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7
+  ret <8 x i16> %t5
+
+end:
+  %a1 = add i16 %t1, 4
+  %t6 = insertelement <8 x i16> zeroinitializer, i16 %a1, i32 0
+  ret <8 x i16> %t6
+}
+
+; The widening shuffle must be inserted at a valid point (after the PHIs).
+
+define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @pr25999_phis1(
+; CHECK-NEXT:  bb1:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[R:%.*]] = call <2 x double> @dummy(<2 x double> [[A:%.*]])
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x double> [ [[A]], [[BB1:%.*]] ], [ [[R]], [[BB2]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x double> [ [[B:%.*]], [[BB1]] ], [ zeroinitializer, [[BB2]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT:    ret <4 x double> [[TMP4]]
+;
+bb1:
+  br i1 %c, label %bb2, label %bb3
+
+bb2:
+  %r = call <2 x double> @dummy(<2 x double> %a)
+  br label %bb3
+
+bb3:
+  %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
+  %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
+  %tmp3 = extractelement <2 x double> %tmp1, i32 0
+  %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
+  ret <4 x double> %tmp4
+}
+
+declare <2 x double> @dummy(<2 x double>)
+
+define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @pr25999_phis2(
+; CHECK-NEXT:  bb1:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[R:%.*]] = call <2 x double> @dummy(<2 x double> [[A:%.*]])
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x double> [ [[A]], [[BB1:%.*]] ], [ [[R]], [[BB2]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x double> [ [[B:%.*]], [[BB1]] ], [ zeroinitializer, [[BB2]] ]
+; CHECK-NEXT:    [[D:%.*]] = fadd <2 x double> [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x double> [[D]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT:    ret <4 x double> [[TMP4]]
+;
+bb1:
+  br i1 %c, label %bb2, label %bb3
+
+bb2:
+  %r = call <2 x double> @dummy(<2 x double> %a)
+  br label %bb3
+
+bb3:
+  %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
+  %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
+  %d = fadd <2 x double> %tmp1, %tmp1
+  %tmp3 = extractelement <2 x double> %d, i32 0
+  %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
+  ret <4 x double> %tmp4
+}
+
+; PR26354: https://llvm.org/bugs/show_bug.cgi?id=26354
+; Don't create a shufflevector if we know that we're not going to replace the insertelement.
+
+define double @pr26354(<2 x double>* %tmp, i1 %B) {
+; CHECK-LABEL: @pr26354(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, <2 x double>* [[TMP:%.*]], align 16
+; CHECK-NEXT:    [[E1:%.*]] = extractelement <2 x double> [[LD]], i32 0
+; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    [[E2:%.*]] = extractelement <2 x double> [[LD]], i32 1
+; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, double [[E2]], i32 3
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PH:%.*]] = phi <4 x double> [ undef, [[ENTRY:%.*]] ], [ [[I1]], [[IF]] ]
+; CHECK-NEXT:    [[E3:%.*]] = extractelement <4 x double> [[PH]], i32 1
+; CHECK-NEXT:    [[MU:%.*]] = fmul double [[E1]], [[E3]]
+; CHECK-NEXT:    ret double [[MU]]
+;
+
+entry:
+  %ld = load <2 x double>, <2 x double>* %tmp
+  %e1 = extractelement <2 x double> %ld, i32 0
+  %e2 = extractelement <2 x double> %ld, i32 1
+  br i1 %B, label %if, label %end
+
+if:
+  %i1 = insertelement <4 x double> zeroinitializer, double %e2, i32 3
+  br label %end
+
+end:
+  %ph = phi <4 x double> [ undef, %entry ], [ %i1, %if ]
+  %e3 = extractelement <4 x double> %ph, i32 1
+  %mu = fmul double %e1, %e3
+  ret double %mu
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=30923
+; Delete the widening shuffle if we're not going to reduce the extract/insert to a shuffle.
+
+define <4 x float> @PR30923(<2 x float> %x) {
+; CHECK-LABEL: @PR30923(
+; CHECK-NEXT:  bb1:
+; CHECK-NEXT:    [[EXT1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
+; CHECK-NEXT:    store float [[EXT1]], float* undef, align 4
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[EXT2:%.*]] = extractelement <2 x float> [[X]], i32 0
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float undef, float undef>, float [[EXT2]], i32 2
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[EXT1]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
+bb1:
+  %ext1 = extractelement <2 x float> %x, i32 1
+  store float %ext1, float* undef, align 4
+  br label %bb2
+
+bb2:
+  %widen = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %ext2 = extractelement <4 x float> %widen, i32 0
+  %ins1 = insertelement <4 x float> <float 0.0, float 0.0, float undef, float undef>, float %ext2, i32 2
+  %ins2 = insertelement <4 x float> %ins1, float %ext1, i32 3
+  ret <4 x float> %ins2
+}
+
+; Don't insert extractelements from the wider vector before the def of the index operand.
+
+define <4 x i32> @extractelt_insertion(<2 x i32> %x, i32 %y) {
+; CHECK-LABEL: @extractelt_insertion(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[Y:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 [[C]]
+; CHECK-NEXT:    [[E:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RET:%.*]] = select i1 [[E]], <4 x i32> [[B]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[RET]]
+;
+entry:
+  %a = extractelement <2 x i32> %x, i32 1
+  %b = insertelement <4 x i32> zeroinitializer, i32 %a, i64 3
+  %c = add i32 %y, 3
+  %d = extractelement <2 x i32> %x, i32 %c
+  %e = icmp eq i32 %d, 0
+  %ret = select i1 %e, <4 x i32> %b, <4 x i32> zeroinitializer
+  ret <4 x i32> %ret
+}
+
+; PR34724: https://bugs.llvm.org/show_bug.cgi?id=34724
+
+define <4 x float> @collectShuffleElts(<2 x float> %x, float %y) {
+; CHECK-LABEL: @collectShuffleElts(
+; CHECK-NEXT:    [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
+; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 1
+; CHECK-NEXT:    [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[X1]], i32 2
+; CHECK-NEXT:    [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[Y:%.*]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[V3]]
+;
+  %x0 = extractelement <2 x float> %x, i32 0
+  %x1 = extractelement <2 x float> %x, i32 1
+  %v1 = insertelement <4 x float> undef, float %x0, i32 1
+  %v2 = insertelement <4 x float> %v1, float %x1, i32 2
+  %v3 = insertelement <4 x float> %v2, float %y, i32 3
+  ret <4 x float> %v3
+}
+
+; Simplest case - insert scalar into undef, then shuffle that value in place into another vector.
+
+define <4 x float> @insert_shuffle(float %x, <4 x float> %y) {
+; CHECK-LABEL: @insert_shuffle(
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %xv = insertelement <4 x float> undef, float %x, i32 0
+  %r = shufflevector <4 x float> %xv, <4 x float> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %r
+}
+
+; Insert scalar into some element of a dummy vector, then move it to a different element in another vector.
+
+define <4 x float> @insert_shuffle_translate(float %x, <4 x float> %y) {
+; CHECK-LABEL: @insert_shuffle_translate(
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 1
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %xv = insertelement <4 x float> undef, float %x, i32 0
+  %r = shufflevector <4 x float> %xv, <4 x float> %y, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+  ret <4 x float> %r
+}
+
+; The vector operand of the insert is irrelevant.
+
+define <4 x float> @insert_not_undef_shuffle_translate(float %x, <4 x float> %y, <4 x float> %q) {
+; CHECK-LABEL: @insert_not_undef_shuffle_translate(
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 2
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %xv = insertelement <4 x float> %q, float %x, i32 3
+  %r = shufflevector <4 x float> %xv, <4 x float> %y, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
+  ret <4 x float> %r
+}
+
+; The insert may be the 2nd operand of the shuffle. The shuffle mask can include undef elements.
+
+define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
+; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute(
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 1
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %xv = insertelement <4 x float> %q, float %x, i32 2
+  %r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> <i32 0, i32 6, i32 2, i32 undef>
+  ret <4 x float> %r
+}
+
+; Both shuffle operands may be inserts - choose the correct side.
+
+define <4 x float> @insert_insert_shuffle_translate(float %x1, float %x2, <4 x float> %q) {
+; CHECK-LABEL: @insert_insert_shuffle_translate(
+; CHECK-NEXT:    [[XV2:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X2:%.*]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[XV2]], float [[X1:%.*]], i32 1
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %xv1 = insertelement <4 x float> %q, float %x1, i32 0
+  %xv2 = insertelement <4 x float> %q, float %x2, i32 2
+  %r = shufflevector <4 x float> %xv1, <4 x float> %xv2, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+  ret <4 x float> %r
+}
+
+; Both shuffle operands may be inserts - choose the correct side.
+
+define <4 x float> @insert_insert_shuffle_translate_commute(float %x1, float %x2, <4 x float> %q) {
+; CHECK-LABEL: @insert_insert_shuffle_translate_commute(
+; CHECK-NEXT:    [[XV1:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X1:%.*]], i32 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[XV1]], float [[X2:%.*]], i32 1
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %xv1 = insertelement <4 x float> %q, float %x1, i32 0
+  %xv2 = insertelement <4 x float> %q, float %x2, i32 2
+  %r = shufflevector <4 x float> %xv1, <4 x float> %xv2, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  ret <4 x float> %r
+}
+
+; Negative test - this only works if the shuffle is choosing exactly 1 element from 1 of the inputs.
+; TODO: But this could be a special-case because we're inserting into the same base vector.
+
+define <4 x float> @insert_insert_shuffle_translate_wrong_mask(float %x1, float %x2, <4 x float> %q) {
+; CHECK-LABEL: @insert_insert_shuffle_translate_wrong_mask(
+; CHECK-NEXT:    [[XV1:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X1:%.*]], i32 0
+; CHECK-NEXT:    [[XV2:%.*]] = insertelement <4 x float> [[Q]], float [[X2:%.*]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[XV1]], <4 x float> [[XV2]], <4 x i32> <i32 0, i32 6, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %xv1 = insertelement <4 x float> %q, float %x1, i32 0
+  %xv2 = insertelement <4 x float> %q, float %x2, i32 2
+  %r = shufflevector <4 x float> %xv1, <4 x float> %xv2, <4 x i32> <i32 0, i32 6, i32 2, i32 7>
+  ret <4 x float> %r
+}
+
+; The insert may have other uses.
+
+declare void @use(<4 x float>)
+
+define <4 x float> @insert_not_undef_shuffle_translate_commute_uses(float %x, <4 x float> %y, <4 x float> %q) {
+; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_uses(
+; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X:%.*]], i32 2
+; CHECK-NEXT:    call void @use(<4 x float> [[XV]])
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X]], i32 0
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %xv = insertelement <4 x float> %q, float %x, i32 2
+  call void @use(<4 x float> %xv)
+  %r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> <i32 6, i32 undef, i32 2, i32 3>
+  ret <4 x float> %r
+}
+
+; Negative test - size-changing shuffle.
+
+define <5 x float> @insert_not_undef_shuffle_translate_commute_lengthen(float %x, <4 x float> %y, <4 x float> %q) {
+; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_lengthen(
+; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <5 x i32> <i32 0, i32 6, i32 2, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <5 x float> [[R]]
+;
+  %xv = insertelement <4 x float> %q, float %x, i32 2
+  %r = shufflevector <4 x float> %y, <4 x float> %xv, <5 x i32> <i32 0, i32 6, i32 2, i32 undef, i32 undef>
+  ret <5 x float> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/insert-val-extract-elem.ll b/llvm/test/Transforms/InstCombine/insert-val-extract-elem.ll
new file mode 100644
index 00000000000..db7b4031f37
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/insert-val-extract-elem.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+; CHECK-LABEL: julia_2xdouble
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <2 x double>
+define void @julia_2xdouble([2 x double]* sret, <2 x double>*) {
+top:
+  %x = load <2 x double>, <2 x double>* %1
+  %x0 = extractelement <2 x double> %x, i32 0
+  %i0 = insertvalue [2 x double] undef, double %x0, 0
+  %x1 = extractelement <2 x double> %x, i32 1
+  %i1 = insertvalue [2 x double] %i0, double %x1, 1
+  store [2 x double] %i1, [2 x double]* %0, align 4
+  ret void
+}
+
+; Test with two inserts to the same index
+; CHECK-LABEL: julia_2xi64
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <2 x i64>
+define void @julia_2xi64([2 x i64]* sret, <2 x i64>*) {
+top:
+  %x = load <2 x i64>, <2 x i64>* %1
+  %x0 = extractelement <2 x i64> %x, i32 1
+  %i0 = insertvalue [2 x i64] undef, i64 %x0, 0
+  %x1 = extractelement <2 x i64> %x, i32 1
+  %i1 = insertvalue [2 x i64] %i0, i64 %x1, 1
+  %x2 = extractelement <2 x i64> %x, i32 0
+  %i2 = insertvalue [2 x i64] %i1, i64 %x2, 0
+  store [2 x i64] %i2, [2 x i64]* %0, align 4
+  ret void
+}
+
+; CHECK-LABEL: julia_4xfloat
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <4 x float>
+define void @julia_4xfloat([4 x float]* sret, <4 x float>*) {
+top:
+  %x = load <4 x float>, <4 x float>* %1
+  %x0 = extractelement <4 x float> %x, i32 0
+  %i0 = insertvalue [4 x float] undef, float %x0, 0
+  %x1 = extractelement <4 x float> %x, i32 1
+  %i1 = insertvalue [4 x float] %i0, float %x1, 1
+  %x2 = extractelement <4 x float> %x, i32 2
+  %i2 = insertvalue [4 x float] %i1, float %x2, 2
+  %x3 = extractelement <4 x float> %x, i32 3
+  %i3 = insertvalue [4 x float] %i2, float %x3, 3
+  store [4 x float] %i3, [4 x float]* %0, align 4
+  ret void
+}
+
+%pseudovec = type { float, float, float, float }
+
+; CHECK-LABEL: julia_pseudovec
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <4 x float>
+define void @julia_pseudovec(%pseudovec* sret, <4 x float>*) {
+top:
+  %x = load <4 x float>, <4 x float>* %1
+  %x0 = extractelement <4 x float> %x, i32 0
+  %i0 = insertvalue %pseudovec undef, float %x0, 0
+  %x1 = extractelement <4 x float> %x, i32 1
+  %i1 = insertvalue %pseudovec %i0, float %x1, 1
+  %x2 = extractelement <4 x float> %x, i32 2
+  %i2 = insertvalue %pseudovec %i1, float %x2, 2
+  %x3 = extractelement <4 x float> %x, i32 3
+  %i3 = insertvalue %pseudovec %i2, float %x3, 3
+  store %pseudovec %i3, %pseudovec* %0, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/int_sideeffect.ll b/llvm/test/Transforms/InstCombine/int_sideeffect.ll
new file mode 100644
index 00000000000..6355c4557ef
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/int_sideeffect.ll
@@ -0,0 +1,14 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+declare void @llvm.sideeffect()
+
+; Store-to-load forwarding across a @llvm.sideeffect.
+
+; CHECK-LABEL: s2l
+; CHECK-NOT: load
+define float @s2l(float* %p) {
+    store float 0.0, float* %p
+    call void @llvm.sideeffect()
+    %t = load float, float* %p
+    ret float %t
+}
diff --git a/llvm/test/Transforms/InstCombine/intersect-accessgroup.ll b/llvm/test/Transforms/InstCombine/intersect-accessgroup.ll
new file mode 100644
index 00000000000..858b9b6e59b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intersect-accessgroup.ll
@@ -0,0 +1,113 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+;
+; void func(long n, double A[static const restrict n]) {
+; 	for (int i = 0; i <  n; i+=1)
+; 		for (int j = 0; j <  n;j+=1)
+; 			for (int k = 0; k < n; k += 1)
+; 				for (int l = 0; l < n; l += 1) {
+; 					double *p = &A[i + j + k + l];
+; 					double x = *p;
+; 					double y = *p;
+; 					arg(x + y);
+; 				}
+; }
+;
+; Check for correctly merging access group metadata for instcombine
+; (only common loops are parallel == intersection)
+; Note that combined load would be parallel to loop !16 since both
+; origin loads are parallel to it, but it references two access groups
+; (!8 and !9), neither of which contain both loads. As such, the
+; information that the combined load is parallel to !16 is lost.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @arg(double)
+
+define void @func(i64 %n, double* noalias nonnull %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i32 [ 0, %entry ], [ %add31, %for.inc30 ]
+  %conv = sext i32 %i.0 to i64
+  %cmp = icmp slt i64 %conv, %n
+  br i1 %cmp, label %for.cond2, label %for.end32
+
+for.cond2:
+  %j.0 = phi i32 [ %add28, %for.inc27 ], [ 0, %for.cond ]
+  %conv3 = sext i32 %j.0 to i64
+  %cmp4 = icmp slt i64 %conv3, %n
+  br i1 %cmp4, label %for.cond8, label %for.inc30
+
+for.cond8:
+  %k.0 = phi i32 [ %add25, %for.inc24 ], [ 0, %for.cond2 ]
+  %conv9 = sext i32 %k.0 to i64
+  %cmp10 = icmp slt i64 %conv9, %n
+  br i1 %cmp10, label %for.cond14, label %for.inc27
+
+for.cond14:
+  %l.0 = phi i32 [ %add23, %for.body19 ], [ 0, %for.cond8 ]
+  %conv15 = sext i32 %l.0 to i64
+  %cmp16 = icmp slt i64 %conv15, %n
+  br i1 %cmp16, label %for.body19, label %for.inc24
+
+for.body19:
+  %add = add nsw i32 %i.0, %j.0
+  %add20 = add nsw i32 %add, %k.0
+  %add21 = add nsw i32 %add20, %l.0
+  %idxprom = sext i32 %add21 to i64
+  %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom
+  %0 = load double, double* %arrayidx, align 8, !llvm.access.group !1
+  %1 = load double, double* %arrayidx, align 8, !llvm.access.group !2
+  %add22 = fadd double %0, %1
+  call void @arg(double %add22), !llvm.access.group !3
+  %add23 = add nsw i32 %l.0, 1
+  br label %for.cond14, !llvm.loop !11
+
+for.inc24:
+  %add25 = add nsw i32 %k.0, 1
+  br label %for.cond8, !llvm.loop !14
+
+for.inc27:
+  %add28 = add nsw i32 %j.0, 1
+  br label %for.cond2, !llvm.loop !16
+
+for.inc30:
+  %add31 = add nsw i32 %i.0, 1
+  br label %for.cond, !llvm.loop !18
+
+for.end32:
+  ret void
+}
+
+
+; access groups
+!7 = distinct !{}
+!8 = distinct !{}
+!9 = distinct !{}
+
+; access group lists
+!1 = !{!7, !9}
+!2 = !{!7, !8}
+!3 = !{!7, !8, !9}
+
+!11 = distinct !{!11, !13}
+!13 = !{!"llvm.loop.parallel_accesses", !7}
+
+!14 = distinct !{!14, !15}
+!15 = !{!"llvm.loop.parallel_accesses", !8}
+
+!16 = distinct !{!16, !17}
+!17 = !{!"llvm.loop.parallel_accesses", !8, !9}
+
+!18 = distinct !{!18, !19}
+!19 = !{!"llvm.loop.parallel_accesses", !9}
+
+
+; CHECK: load double, {{.*}} !llvm.access.group ![[ACCESSGROUP_0:[0-9]+]]
+; CHECK: br label %for.cond14, !llvm.loop ![[LOOP_4:[0-9]+]]
+
+; CHECK: ![[ACCESSGROUP_0]] = distinct !{}
+
+; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[PARALLEL_ACCESSES_5:[0-9]+]]}
+; CHECK: ![[PARALLEL_ACCESSES_5]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_0]]}
diff --git a/llvm/test/Transforms/InstCombine/intptr1.ll b/llvm/test/Transforms/InstCombine/intptr1.ll
new file mode 100644
index 00000000000..3d8f915eb43
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intptr1.ll
@@ -0,0 +1,193 @@
+; RUN: opt < %s  -instcombine  -S | FileCheck %s
+
+define void @test1(float* %a, float* readnone %a_end, i64* %b.i64) {
+; CHECK-LABEL: @test1
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b = load i64, i64* %b.i64, align 8
+; CHECK: load float*, float**
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ]
+
+; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ],
+; CHECK-NOT: %b.addr.02 = phi i64
+
+  %tmp = inttoptr i64 %b.addr.02 to float*
+; CHECK-NOT: inttoptr i64
+  %tmp1 = load float, float* %tmp, align 4
+; CHECK: = load
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %add = getelementptr inbounds float, float* %tmp, i64 1
+  %add.int = ptrtoint float* %add to i64
+; CHECK %add = getelementptr
+; CHECK-NOT: ptrtoint float*
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+; CHECK: %incdec.ptr = 
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @test1_neg(float* %a, float* readnone %a_end, i64* %b.i64) {
+; CHECK-LABEL: @test1_neg
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b = load i64, i64* %b.i64, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %a.addr.03 = phi float* [ %incdec.ptr, %bb ], [ %a, %for.body.preheader ]
+  %b.addr.02 = phi i64 [ %add.int, %bb ], [ %b, %for.body.preheader ]
+
+; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %bb ], [ %a, %for.body.preheader ]
+; CHECK: %b.addr.02 = phi i64
+
+  %tmp = inttoptr i64 %b.addr.02 to float*
+; CHECK: inttoptr i64
+  %ptrcmp = icmp ult float* %tmp, %a_end
+  br i1 %ptrcmp, label %for.end, label %bb
+
+bb:
+  %tmp1 = load float, float* %a, align 4
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %add = getelementptr inbounds float, float* %a, i64 1
+  %add.int = ptrtoint float* %add to i64
+; CHECK: ptrtoint float*
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+define void @test2(float* %a, float* readnone %a_end, float** %b.float) {
+; CHECK-LABEL: @test2
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b.i64 = bitcast float** %b.float to i64*
+  %b = load i64, i64* %b.i64, align 8
+; CHECK: load float*, float**
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ]
+
+; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ],
+; CHECK-NOT: %b.addr.02 = phi i64
+
+  %tmp = inttoptr i64 %b.addr.02 to float*
+; CHECK-NOT: inttoptr i64
+  %tmp1 = load float, float* %tmp, align 4
+; CHECK: = load
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %add = getelementptr inbounds float, float* %tmp, i64 1
+; CHECK: %add = 
+  %add.int = ptrtoint float* %add to i64
+; CHECK-NOT: ptrtoint float*
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+; CHECK: %incdec.ptr = 
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+define void @test3(float* %a, float* readnone %a_end, i8** %b.i8p) {
+; CHECK-LABEL: @test3
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b.i64 = bitcast i8** %b.i8p to i64*
+  %b = load i64, i64* %b.i64, align 8
+; CHECK: load float*, float**
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ]
+
+; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+; CHECK: %b.addr.02.ptr = phi float* [ %add, %for.body ],
+; CHECK-NOT: %b.addr.02 = phi i64
+
+  %tmp = inttoptr i64 %b.addr.02 to float*
+; CHECK-NOT: inttoptr i64
+  %tmp1 = load float, float* %tmp, align 4
+; CHECK: = load
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %add = getelementptr inbounds float, float* %tmp, i64 1
+; CHECK: %add = getelementptr
+  %add.int = ptrtoint float* %add to i64
+; CHECK-NOT: ptrtoint float*
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+; CHECK: %incdec.ptr = 
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+define void @test4(float* %a, float* readnone %a_end, float** %b.float) {
+entry:
+; CHECK-LABEL: @test4
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b.f = load float*, float** %b.float, align 8
+  %b = ptrtoint float* %b.f to i64
+; CHECK: load float*, float**
+; CHECK-NOT: ptrtoint float*
+  br label %for.body
+; CHECK: br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ]
+  %tmp = inttoptr i64 %b.addr.02 to float*
+; CHECK-NOT: inttoptr i64
+  %tmp1 = load float, float* %tmp, align 4
+; CHECK: = load
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %add = getelementptr inbounds float, float* %tmp, i64 1
+; CHECK: %add = 
+  %add.int = ptrtoint float* %add to i64
+; CHECK-NOT: ptrtoint float*
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+; CHECK: %incdec.ptr =
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/intptr2.ll b/llvm/test/Transforms/InstCombine/intptr2.ll
new file mode 100644
index 00000000000..b105a722749
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intptr2.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -instcombine -S | FileCheck %s
+
+define void @test1(float* %a, float* readnone %a_end, i32* %b.i) {
+; CHECK-LABEL: @test1
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b = ptrtoint i32 * %b.i to i64
+; CHECK: bitcast
+; CHECK-NOT: ptrtoint
+  br label %for.body
+; CHECK: br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ]
+; CHECK:  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+; CHECK-NOT: phi i64 
+  %tmp = inttoptr i64 %b.addr.02 to float*
+; CHECK-NOT: inttoptr
+  %tmp1 = load float, float* %tmp, align 4
+; CHECK: = load
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %add = getelementptr inbounds float, float* %tmp, i64 1
+; CHECK: %add = 
+  %add.int = ptrtoint float* %add to i64
+; CHECK-NOT: ptrtoint
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+; CHECK: %incdec.ptr = 
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/intptr3.ll b/llvm/test/Transforms/InstCombine/intptr3.ll
new file mode 100644
index 00000000000..72b81ce350f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intptr3.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -instcombine -S | FileCheck %s
+
+define  void @test(float* %a, float* readnone %a_end, i64 %b) unnamed_addr  {
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b.float = inttoptr i64 %b to float*
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ]
+  %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b, %for.body.preheader ]
+; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+; CHECK-NEXT:  %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ]
+; CHECK-NEXT: = load float
+  %l = load float, float* %b.addr.float, align 4 
+  %mul.i = fmul float %l, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+; CHECK: store float
+  %b.addr.float.2 = inttoptr i64 %b.addr.i64 to float*
+; CHECK-NOT: inttoptr
+  %b.addr.float.inc = getelementptr inbounds float, float* %b.addr.float.2, i64 1
+; CHECK: %b.addr.float.inc = 
+  %b.addr.i64.inc = ptrtoint float* %b.addr.float.inc to i64
+; CHECK-NOT: ptrtoint
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+; CHECK: %incdec.ptr = 
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/llvm/test/Transforms/InstCombine/intptr4.ll b/llvm/test/Transforms/InstCombine/intptr4.ll
new file mode 100644
index 00000000000..663090f06e8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intptr4.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s  -instcombine -S | FileCheck %s
+
+define  void @test(float* %a, float* readnone %a_end, i64 %b, float* %bf) unnamed_addr  {
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  %b.float = inttoptr i64 %b to float*
+  br i1 %cmp1, label %bb1, label %bb2
+
+bb1:
+ br label %for.body.preheader
+bb2:
+ %bfi = ptrtoint float* %bf to i64
+ br label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %b.phi = phi i64 [%b, %bb1], [%bfi, %bb2]
+  br label %for.body
+; CHECK: for.body.preheader
+; CHECK: %b.phi = phi
+; CHECK: %b.phi.ptr =
+; CHECK: br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+; CHECK: for.body
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ]
+  %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b.phi, %for.body.preheader ]
+; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+; CHECK-NEXT: %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ]
+; CHECK-NEXT: %b.addr.i64.ptr = phi
+; CHECK-NOT:  = phi i64
+; CHECK: = load
+  %l = load float, float* %b.addr.float, align 4 
+  %mul.i = fmul float %l, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %b.addr.float.2 = inttoptr i64 %b.addr.i64 to float*
+  %b.addr.float.inc = getelementptr inbounds float, float* %b.addr.float.2, i64 1
+; CHECK: store float %mul.i
+; CHECK-NOT: inttoptr
+; CHECK: %b.addr.float.inc =
+  %b.addr.i64.inc = ptrtoint float* %b.addr.float.inc to i64
+; CHECK-NOT: ptrtoint
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+; CHECK: %incdec.ptr = 
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/llvm/test/Transforms/InstCombine/intptr5.ll b/llvm/test/Transforms/InstCombine/intptr5.ll
new file mode 100644
index 00000000000..c5e728f1441
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intptr5.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s  -instcombine -S | FileCheck %s
+
+define  void @test(float* %a, float* readnone %a_end, i64 %b, float* %bf) unnamed_addr  {
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  %b.float = inttoptr i64 %b to float*
+  br i1 %cmp1, label %bb1, label %bb2
+
+bb1:
+ br label %for.body.preheader
+bb2:
+ %bfi = ptrtoint float* %bf to i64
+ br label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %b.phi = phi i64 [%b, %bb1], [%bfi, %bb2]
+  switch i64 %b, label %for.body [
+    i64 1, label %for.body
+  ]
+; CHECK: for.body.preheader
+; CHECK: %b.phi = phi
+; CHECK: %b.phi.ptr =
+; CHECK-NOT: %b.phi.ptr2 =
+; CHECK: switch
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+; CHECK: for.body
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ], [%a, %for.body.preheader]
+  %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ], [%b.float, %for.body.preheader]
+  %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b.phi, %for.body.preheader ], [ %b.phi, %for.body.preheader]
+; CHECK: %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+; CHECK-NEXT: %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ]
+; CHECK-NEXT: %b.addr.i64.ptr = phi 
+; CHECK-NOT: = %b.addr.i64
+; CHECK: = load
+  %l = load float, float* %b.addr.float, align 4 
+  %mul.i = fmul float %l, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %b.addr.float.2 = inttoptr i64 %b.addr.i64 to float*
+  %b.addr.float.inc = getelementptr inbounds float, float* %b.addr.float.2, i64 1
+; CHECK: store float %mul.i
+; CHECK-NOT: inttoptr
+; CHECK: %b.addr.float.inc =
+  %b.addr.i64.inc = ptrtoint float* %b.addr.float.inc to i64
+; CHECK-NOT: ptrtoint
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+; CHECK: %incdec.ptr = 
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/llvm/test/Transforms/InstCombine/intptr6.ll b/llvm/test/Transforms/InstCombine/intptr6.ll
new file mode 100644
index 00000000000..9c29145ab16
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intptr6.ll
@@ -0,0 +1,90 @@
+; RUN: opt < %s  -instcombine -S 
+; no crash
+
+%A = type { %B }
+%B = type { %C *}
+%C = type <{ i32 (...)**, i32, [4 x i8] }>
+
+$foo = comdat any
+
+@bar= external thread_local global %A, align 8
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: inlinehint sanitize_memory uwtable
+define void @foo() local_unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %0 = load %C*, %C** getelementptr inbounds (%A, %A* @bar, i64 0, i32 0, i32 0), align 8
+  %1 = ptrtoint %C* %0 to i64
+  %count.i.i.i23 = getelementptr inbounds %C, %C* %0, i64 0, i32 1
+  store i32 0, i32* %count.i.i.i23, align 8
+  %2 = invoke i8* @_Znwm() #3
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %call.i25 = invoke i8* @_Znwm() #3
+          to label %call.i.noexc unwind label %lpad4
+
+call.i.noexc:                                     ; preds = %invoke.cont
+  invoke void @lazy()
+          to label %invoke.cont5 unwind label %lpad.i
+
+lpad.i:                                           ; preds = %call.i.noexc
+  %3 = landingpad { i8*, i32 }
+          cleanup
+  br label %ehcleanup
+
+invoke.cont5:                                     ; preds = %call.i.noexc
+  %4 = ptrtoint i8* %call.i25 to i64
+  invoke void @scale()
+          to label %invoke.cont16 unwind label %lpad15
+
+invoke.cont16:                                    ; preds = %invoke.cont5
+  ret void
+
+lpad:                                             ; preds = %entry
+  %5 = landingpad { i8*, i32 }
+          cleanup
+  unreachable
+
+lpad4:                                            ; preds = %invoke.cont
+  %6 = landingpad { i8*, i32 }
+          cleanup
+  unreachable
+
+ehcleanup:                                        ; preds = %lpad.i
+  br label %ehcleanup21
+
+lpad15:                                           ; preds = %invoke.cont5
+  %7 = landingpad { i8*, i32 }
+          cleanup
+  br label %ehcleanup21
+
+ehcleanup21:                                      ; preds = %lpad15, %ehcleanup
+  %actual_other.sroa.0.0 = phi i64 [ %1, %ehcleanup ], [ %4, %lpad15 ]
+  %8 = inttoptr i64 %actual_other.sroa.0.0 to %C*
+  br i1 undef, label %_ZN4CGAL6HandleD2Ev.exit, label %land.lhs.true.i
+
+land.lhs.true.i:                                  ; preds = %ehcleanup21
+  %count.i = getelementptr inbounds %C, %C* %8, i64 0, i32 1
+  %9 = load i32, i32* %count.i, align 8
+  unreachable
+
+_ZN4CGAL6HandleD2Ev.exit:                         ; preds = %ehcleanup21
+  resume { i8*, i32 } undef
+}
+
+; Function Attrs: nobuiltin
+declare noalias nonnull i8* @_Znwm() local_unnamed_addr #1
+
+; Function Attrs: sanitize_memory uwtable
+declare void @scale() local_unnamed_addr #2 align 2
+
+; Function Attrs: sanitize_memory uwtable
+declare void @lazy() unnamed_addr #2 align 2
+
+attributes #0 = { inlinehint sanitize_memory uwtable}
+attributes #1 = { nobuiltin } 
+attributes #2 = { sanitize_memory uwtable } 
+attributes #3 = { builtin }
+
diff --git a/llvm/test/Transforms/InstCombine/intptr7.ll b/llvm/test/Transforms/InstCombine/intptr7.ll
new file mode 100644
index 00000000000..1e83bacd65b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intptr7.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s  -instcombine -S | FileCheck %s
+
+define void @matching_phi(i64 %a, float* %b, i1 %cond) {
+; CHECK-LABEL: @matching_phi
+entry:
+  %cmp1 = icmp  eq i1 %cond, 0
+  %add.int = add i64 %a, 1
+  %add = inttoptr i64 %add.int to float *
+
+  %addb = getelementptr inbounds float, float* %b, i64 2
+  %addb.int = ptrtoint float* %addb to i64
+  br i1 %cmp1, label %A, label %B
+A:
+  br label %C
+B:
+  store float 1.0e+01, float* %add, align 4
+  br label %C
+
+C:
+  %a.addr.03 = phi float* [ %addb, %A ], [ %add, %B ]
+  %b.addr.02 = phi i64 [ %addb.int, %A ], [ %add.int, %B ]
+  %tmp = inttoptr i64 %b.addr.02 to float*
+; CHECK: %a.addr.03 = phi
+; CHECK-NEXT: = load
+  %tmp1 = load float, float* %tmp, align 4
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  ret void
+}
+
+define void @no_matching_phi(i64 %a, float* %b, i1 %cond) {
+; CHECK-LABEL: @no_matching_phi
+entry:
+  %cmp1 = icmp  eq i1 %cond, 0
+  %add.int = add i64 %a, 1
+  %add = inttoptr i64 %add.int to float *
+
+  %addb = getelementptr inbounds float, float* %b, i64 2
+  %addb.int = ptrtoint float* %addb to i64
+  br i1 %cmp1, label %A, label %B
+A:
+  br label %C
+B:
+  store float 1.0e+01, float* %add, align 4
+  br label %C
+
+C:
+  %a.addr.03 = phi float* [ %addb, %A ], [ %add, %B ]
+  %b.addr.02 = phi i64 [ %addb.int, %B ], [ %add.int, %A ]
+  %tmp = inttoptr i64 %b.addr.02 to float*
+  %tmp1 = load float, float* %tmp, align 4
+; CHECK: %a.addr.03 = phi
+; CHECK-NEXT: %b.addr.02.ptr = phi
+; CHECK-NEXT: = load
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll
new file mode 100644
index 00000000000..157c14bbd95
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intrinsics.ll
@@ -0,0 +1,427 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare double @llvm.powi.f64(double, i32) nounwind readonly
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i1 @llvm.cttz.i1(i1, i1) nounwind readnone
+declare i1 @llvm.ctlz.i1(i1, i1) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
+declare double @llvm.cos.f64(double %Val) nounwind readonly
+declare double @llvm.sin.f64(double %Val) nounwind readonly
+declare double @llvm.floor.f64(double %Val) nounwind readonly
+declare double @llvm.ceil.f64(double %Val) nounwind readonly
+declare double @llvm.trunc.f64(double %Val) nounwind readonly
+declare double @llvm.rint.f64(double %Val) nounwind readonly
+declare double @llvm.nearbyint.f64(double %Val) nounwind readonly
+
+define void @powi(double %V, double *%P) {
+  %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
+  store volatile double %A, double* %P
+
+  %D = tail call double @llvm.powi.f64(double %V, i32 2) nounwind
+  store volatile double %D, double* %P
+  ret void
+; CHECK-LABEL: @powi(
+; CHECK: %A = fdiv double 1.0{{.*}}, %V
+; CHECK: store volatile double %A,
+; CHECK: %D = fmul double %V, %V
+; CHECK: store volatile double %D
+}
+
+define i32 @cttz(i32 %a) {
+; CHECK-LABEL: @cttz(
+; CHECK-NEXT:    ret i32 3
+;
+  %or = or i32 %a, 8
+  %and = and i32 %or, -8
+  %count = tail call i32 @llvm.cttz.i32(i32 %and, i1 true) nounwind readnone
+  ret i32 %count
+}
+
+define <2 x i32> @cttz_vec(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_vec(
+; CHECK-NEXT:    ret <2 x i32> <i32 3, i32 3>
+;
+  %or = or <2 x i32> %a, <i32 8, i32 8>
+  %and = and <2 x i32> %or, <i32 -8, i32 -8>
+  %count = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %and, i1 true) nounwind readnone
+  ret <2 x i32> %count
+}
+
+; Make sure we don't add range metadata to i1 cttz.
+define i1 @cttz_i1(i1 %arg) {
+; CHECK-LABEL: @cttz_i1(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.cttz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 false) nounwind readnone
+  ret i1 %cnt
+}
+
+define i1 @cttz_knownbits(i32 %arg) {
+; CHECK-LABEL: @cttz_knownbits(
+; CHECK-NEXT:    ret i1 false
+;
+  %or = or i32 %arg, 4
+  %cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone
+  %res = icmp eq i32 %cnt, 4
+  ret i1 %res
+}
+
+define <2 x i1> @cttz_knownbits_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %or = or <2 x i32> %arg, <i32 4, i32 4>
+  %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i32> %cnt, <i32 4, i32 4>
+  ret <2 x i1> %res
+}
+
+define i32 @cttz_knownbits2(i32 %arg) {
+; CHECK-LABEL: @cttz_knownbits2(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG:%.*]], 4
+; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2, !range ![[$CTTZ_RANGE:[0-9]+]]
+; CHECK-NEXT:    ret i32 [[CNT]]
+;
+  %or = or i32 %arg, 4
+  %cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone
+  ret i32 %cnt
+}
+
+define <2 x i32> @cttz_knownbits2_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits2_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT:    ret <2 x i32> [[CNT]]
+;
+  %or = or <2 x i32> %arg, <i32 4, i32 4>
+  %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+  ret <2 x i32> %cnt
+}
+
+define i1 @cttz_knownbits3(i32 %arg) {
+; CHECK-LABEL: @cttz_knownbits3(
+; CHECK-NEXT:    ret i1 false
+;
+  %or = or i32 %arg, 4
+  %cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone
+  %res = icmp eq i32 %cnt, 3
+  ret i1 %res
+}
+
+define <2 x i1> @cttz_knownbits3_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits3_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %or = or <2 x i32> %arg, <i32 4, i32 4>
+  %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+  ret <2 x i1> %res
+}
+
+define i8 @ctlz(i8 %a) {
+; CHECK-LABEL: @ctlz(
+; CHECK-NEXT:    ret i8 2
+;
+  %or = or i8 %a, 32
+  %and = and i8 %or, 63
+  %count = tail call i8 @llvm.ctlz.i8(i8 %and, i1 true) nounwind readnone
+  ret i8 %count
+}
+
+define <2 x i8> @ctlz_vec(<2 x i8> %a) {
+; CHECK-LABEL: @ctlz_vec(
+; CHECK-NEXT:    ret <2 x i8> <i8 2, i8 2>
+;
+  %or = or <2 x i8> %a, <i8 32, i8 32>
+  %and = and <2 x i8> %or, <i8 63, i8 63>
+  %count = tail call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %and, i1 true) nounwind readnone
+  ret <2 x i8> %count
+}
+
+; Make sure we don't add range metadata to i1 ctlz.
+define i1 @ctlz_i1(i1 %arg) {
+; CHECK-LABEL: @ctlz_i1(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 false) nounwind readnone
+  ret i1 %cnt
+}
+
+define i1 @ctlz_knownbits(i8 %arg) {
+; CHECK-LABEL: @ctlz_knownbits(
+; CHECK-NEXT:    ret i1 false
+;
+  %or = or i8 %arg, 32
+  %cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone
+  %res = icmp eq i8 %cnt, 4
+  ret i1 %res
+}
+
+define <2 x i1> @ctlz_knownbits_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %or = or <2 x i8> %arg, <i8 32, i8 32>
+  %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i8> %cnt, <i8 4, i8 4>
+  ret <2 x i1> %res
+}
+
+define i8 @ctlz_knownbits2(i8 %arg) {
+; CHECK-LABEL: @ctlz_knownbits2(
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[ARG:%.*]], 32
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2, !range ![[$CTLZ_RANGE:[0-9]+]]
+; CHECK-NEXT:    ret i8 [[CNT]]
+;
+  %or = or i8 %arg, 32
+  %cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone
+  ret i8 %cnt
+}
+
+define <2 x i8> @ctlz_knownbits2_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits2_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], <i8 32, i8 32>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true)
+; CHECK-NEXT:    ret <2 x i8> [[CNT]]
+;
+  %or = or <2 x i8> %arg, <i8 32, i8 32>
+  %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+  ret <2 x i8> %cnt
+}
+
+define i1 @ctlz_knownbits3(i8 %arg) {
+; CHECK-LABEL: @ctlz_knownbits3(
+; CHECK-NEXT:    ret i1 false
+;
+  %or = or i8 %arg, 32
+  %cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone
+  %res = icmp eq i8 %cnt, 3
+  ret i1 %res
+}
+
+define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits3_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %or = or <2 x i8> %arg, <i8 32, i8 32>
+  %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i8> %cnt, <i8 3, i8 3>
+  ret <2 x i1> %res
+}
+
+define i32 @ctlz_undef(i32 %Value) {
+; CHECK-LABEL: @ctlz_undef(
+; CHECK-NEXT:    ret i32 undef
+;
+  %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true)
+  ret i32 %ctlz
+}
+
+define <2 x i32> @ctlz_undef_vec(<2 x i32> %Value) {
+; CHECK-LABEL: @ctlz_undef_vec(
+; CHECK-NEXT:    ret <2 x i32> undef
+;
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> zeroinitializer, i1 true)
+  ret <2 x i32> %ctlz
+}
+
+define i32 @ctlz_make_undef(i32 %a) {
+  %or = or i32 %a, 8
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false)
+  ret i32 %ctlz
+; CHECK-LABEL: @ctlz_make_undef(
+; CHECK-NEXT: %or = or i32 %a, 8
+; CHECK-NEXT: %ctlz = tail call i32 @llvm.ctlz.i32(i32 %or, i1 true)
+; CHECK-NEXT: ret i32 %ctlz
+}
+
+define <2 x i32> @ctlz_make_undef_vec(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_make_undef_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %or = or <2 x i32> %a, <i32 8, i32 8>
+  %ctlz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %or, i1 false)
+  ret <2 x i32> %ctlz
+}
+
+define i32 @cttz_undef(i32 %Value) nounwind {
+; CHECK-LABEL: @cttz_undef(
+; CHECK-NEXT:    ret i32 undef
+;
+  %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
+  ret i32 %cttz
+}
+
+define <2 x i32> @cttz_undef_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @cttz_undef_vec(
+; CHECK-NEXT:    ret <2 x i32> undef
+;
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> zeroinitializer, i1 true)
+  ret <2 x i32> %cttz
+}
+
+define i32 @cttz_make_undef(i32 %a) {
+  %or = or i32 %a, 8
+  %cttz = tail call i32 @llvm.cttz.i32(i32 %or, i1 false)
+  ret i32 %cttz
+; CHECK-LABEL: @cttz_make_undef(
+; CHECK-NEXT: %or = or i32 %a, 8
+; CHECK-NEXT: %cttz = tail call i32 @llvm.cttz.i32(i32 %or, i1 true)
+; CHECK-NEXT: ret i32 %cttz
+}
+
+define <2 x i32> @cttz_make_undef_vec(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_make_undef_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[CTTZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %or = or <2 x i32> %a, <i32 8, i32 8>
+  %cttz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 false)
+  ret <2 x i32> %cttz
+}
+
+define i32 @ctlz_select(i32 %Value) nounwind {
+; CHECK-LABEL: @ctlz_select(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 %Value, i1 false)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %tobool = icmp ne i32 %Value, 0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %ctlz, i32 32
+  ret i32 %s
+}
+
+define <2 x i32> @ctlz_select_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @ctlz_select_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VALUE:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tobool = icmp ne <2 x i32> %Value, zeroinitializer
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %Value, i1 true)
+  %s = select <2 x i1> %tobool, <2 x i32> %ctlz, <2 x i32> <i32 32, i32 32>
+  ret <2 x i32> %s
+}
+
+define i32 @cttz_select(i32 %Value) nounwind {
+; CHECK-LABEL: @cttz_select(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 %Value, i1 false)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %tobool = icmp ne i32 %Value, 0
+  %cttz = call i32 @llvm.cttz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %cttz, i32 32
+  ret i32 %s
+}
+
+define <2 x i32> @cttz_select_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @cttz_select_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[VALUE:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tobool = icmp ne <2 x i32> %Value, zeroinitializer
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %Value, i1 true)
+  %s = select <2 x i1> %tobool, <2 x i32> %cttz, <2 x i32> <i32 32, i32 32>
+  ret <2 x i32> %s
+}
+
+define void @cos(double *%P) {
+; CHECK-LABEL: @cos(
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* %P, align 8
+; CHECK-NEXT:    ret void
+;
+  %B = tail call double @llvm.cos.f64(double 0.0) nounwind
+  store volatile double %B, double* %P
+
+  ret void
+}
+
+define void @sin(double *%P) {
+; CHECK-LABEL: @sin(
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* %P, align 8
+; CHECK-NEXT:    ret void
+;
+  %B = tail call double @llvm.sin.f64(double 0.0) nounwind
+  store volatile double %B, double* %P
+
+  ret void
+}
+
+define void @floor(double *%P) {
+; CHECK-LABEL: @floor(
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* %P, align 8
+; CHECK-NEXT:    store volatile double -2.000000e+00, double* %P, align 8
+; CHECK-NEXT:    ret void
+;
+  %B = tail call double @llvm.floor.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.floor.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+}
+
+define void @ceil(double *%P) {
+; CHECK-LABEL: @ceil(
+; CHECK-NEXT:    store volatile double 2.000000e+00, double* %P, align 8
+; CHECK-NEXT:    store volatile double -1.000000e+00, double* %P, align 8
+; CHECK-NEXT:    ret void
+;
+  %B = tail call double @llvm.ceil.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.ceil.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+}
+
+define void @trunc(double *%P) {
+; CHECK-LABEL: @trunc(
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* %P, align 8
+; CHECK-NEXT:    store volatile double -1.000000e+00, double* %P, align 8
+; CHECK-NEXT:    ret void
+;
+  %B = tail call double @llvm.trunc.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.trunc.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+}
+
+define void @rint(double *%P) {
+; CHECK-LABEL: @rint(
+; CHECK-NEXT:    store volatile double 2.000000e+00, double* %P, align 8
+; CHECK-NEXT:    store volatile double -2.000000e+00, double* %P, align 8
+; CHECK-NEXT:    ret void
+;
+  %B = tail call double @llvm.rint.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.rint.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+}
+
+define void @nearbyint(double *%P) {
+; CHECK-LABEL: @nearbyint(
+; CHECK-NEXT:    store volatile double 2.000000e+00, double* %P, align 8
+; CHECK-NEXT:    store volatile double -2.000000e+00, double* %P, align 8
+; CHECK-NEXT:    ret void
+;
+  %B = tail call double @llvm.nearbyint.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.nearbyint.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+}
+
+; CHECK: [[$CTTZ_RANGE]] = !{i32 0, i32 3}
+; CHECK: [[$CTLZ_RANGE]] = !{i8 0, i8 3}
diff --git a/llvm/test/Transforms/InstCombine/invariant.group.ll b/llvm/test/Transforms/InstCombine/invariant.group.ll
new file mode 100644
index 00000000000..6b79ceb5b2d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/invariant.group.ll
@@ -0,0 +1,150 @@
+; RUN: opt -instcombine -early-cse -S < %s | FileCheck %s
+
+
+; CHECK-LABEL: define i8* @simplifyNullLaunder()
+define i8* @simplifyNullLaunder() {
+; CHECK-NEXT: ret i8* null
+  %b2 = call i8* @llvm.launder.invariant.group.p0i8(i8* null)
+  ret i8* %b2
+}
+
+; CHECK-LABEL: define i8* @dontSimplifyNullLaunderNoNullOpt()
+define i8* @dontSimplifyNullLaunderNoNullOpt() #0 {
+; CHECK-NEXT: call i8* @llvm.launder.invariant.group.p0i8(i8* null)
+  %b2 = call i8* @llvm.launder.invariant.group.p0i8(i8* null)
+  ret i8* %b2
+}
+
+; CHECK-LABEL: define i8 addrspace(42)* @dontsimplifyNullLaunderForDifferentAddrspace()
+define i8 addrspace(42)* @dontsimplifyNullLaunderForDifferentAddrspace() {
+; CHECK: %b2 = call i8 addrspace(42)* @llvm.launder.invariant.group.p42i8(i8 addrspace(42)* null)
+; CHECK: ret i8 addrspace(42)* %b2
+  %b2 = call i8 addrspace(42)* @llvm.launder.invariant.group.p42i8(i8 addrspace(42)* null)
+  ret i8 addrspace(42)* %b2
+}
+
+; CHECK-LABEL: define i8* @simplifyUndefLaunder()
+define i8* @simplifyUndefLaunder() {
+; CHECK-NEXT: ret i8* undef
+  %b2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
+  ret i8* %b2
+}
+
+; CHECK-LABEL: define i8 addrspace(42)* @simplifyUndefLaunder2()
+define i8 addrspace(42)* @simplifyUndefLaunder2() {
+; CHECK-NEXT: ret i8 addrspace(42)* undef
+  %b2 = call i8 addrspace(42)* @llvm.launder.invariant.group.p42i8(i8 addrspace(42)* undef)
+  ret i8 addrspace(42)* %b2
+}
+
+; CHECK-LABEL: define i8* @simplifyNullStrip()
+define i8* @simplifyNullStrip() {
+; CHECK-NEXT: ret i8* null
+  %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* null)
+  ret i8* %b2
+}
+
+; CHECK-LABEL: define i8* @dontSimplifyNullStripNonNullOpt()
+define i8* @dontSimplifyNullStripNonNullOpt() #0 {
+; CHECK-NEXT: call i8* @llvm.strip.invariant.group.p0i8(i8* null)
+  %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* null)
+  ret i8* %b2
+}
+
+; CHECK-LABEL: define i8 addrspace(42)* @dontsimplifyNullStripForDifferentAddrspace()
+define i8 addrspace(42)* @dontsimplifyNullStripForDifferentAddrspace() {
+; CHECK: %b2 = call i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)* null)
+; CHECK: ret i8 addrspace(42)* %b2
+  %b2 = call i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)* null)
+  ret i8 addrspace(42)* %b2
+}
+
+; CHECK-LABEL: define i8* @simplifyUndefStrip()
+define i8* @simplifyUndefStrip() {
+; CHECK-NEXT: ret i8* undef
+  %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+  ret i8* %b2
+}
+
+; CHECK-LABEL: define i8 addrspace(42)* @simplifyUndefStrip2()
+define i8 addrspace(42)* @simplifyUndefStrip2() {
+; CHECK-NEXT: ret i8 addrspace(42)* undef
+  %b2 = call i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)* undef)
+  ret i8 addrspace(42)* %b2
+}
+
+; CHECK-LABEL: define i8* @simplifyLaunderOfLaunder(
+define i8* @simplifyLaunderOfLaunder(i8* %a) {
+; CHECK:   call i8* @llvm.launder.invariant.group.p0i8(i8* %a)
+; CHECK-NOT: llvm.launder.invariant.group
+  %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %a)
+  %a3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %a2)
+  ret i8* %a3
+}
+
+; CHECK-LABEL: define i8* @simplifyStripOfLaunder(
+define i8* @simplifyStripOfLaunder(i8* %a) {
+; CHECK-NOT: llvm.launder.invariant.group
+; CHECK:   call i8* @llvm.strip.invariant.group.p0i8(i8* %a)
+  %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %a)
+  %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a2)
+  ret i8* %a3
+}
+
+; CHECK-LABEL: define i1 @simplifyForCompare(
+define i1 @simplifyForCompare(i8* %a) {
+  %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %a)
+
+  %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a2)
+  %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a)
+  %c = icmp eq i8* %a3, %b2
+; CHECK: ret i1 true
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i16* @skipWithDifferentTypes(
+define i16* @skipWithDifferentTypes(i8* %a) {
+  %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %a)
+  %c1 = bitcast i8* %a2 to i16*
+
+  ; CHECK: %[[b:.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* %a)
+  %a3 = call i16* @llvm.strip.invariant.group.p0i16(i16* %c1)
+  ; CHECK-NEXT: %[[r:.*]] = bitcast i8* %[[b]] to i16*
+  ; CHECK-NEXT: ret i16* %[[r]]
+  ret i16* %a3
+}
+
+; CHECK-LABEL: define i16 addrspace(42)* @skipWithDifferentTypesAddrspace(
+define i16 addrspace(42)* @skipWithDifferentTypesAddrspace(i8 addrspace(42)* %a) {
+  %a2 = call i8 addrspace(42)* @llvm.launder.invariant.group.p42i8(i8 addrspace(42)* %a)
+  %c1 = bitcast i8 addrspace(42)* %a2 to i16 addrspace(42)*
+
+  ; CHECK: %[[b:.*]] = call i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)* %a)
+  %a3 = call i16 addrspace(42)* @llvm.strip.invariant.group.p42i16(i16 addrspace(42)* %c1)
+  ; CHECK-NEXT: %[[r:.*]] = bitcast i8 addrspace(42)* %[[b]] to i16 addrspace(42)*
+  ; CHECK-NEXT: ret i16 addrspace(42)* %[[r]]
+  ret i16 addrspace(42)* %a3
+}
+
+; CHECK-LABEL: define i16 addrspace(42)* @skipWithDifferentTypesDifferentAddrspace(
+define i16 addrspace(42)* @skipWithDifferentTypesDifferentAddrspace(i8* %a) {
+  %cast = addrspacecast i8* %a to i8 addrspace(42)*
+  %a2 = call i8 addrspace(42)* @llvm.launder.invariant.group.p42i8(i8 addrspace(42)* %cast)
+  %c1 = bitcast i8 addrspace(42)* %a2 to i16 addrspace(42)*
+
+  ; CHECK: %[[b:.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* %a)
+  %a3 = call i16 addrspace(42)* @llvm.strip.invariant.group.p42i16(i16 addrspace(42)* %c1)
+  ; CHECK-NEXT: %[[r:.*]] = bitcast i8* %[[b]] to i16*
+  ; CHECK-NEXT: %[[r2:.*]] = addrspacecast i16* %[[r]] to i16 addrspace(42)*
+  ; CHECK-NEXT: ret i16 addrspace(42)* %[[r2]]
+  ret i16 addrspace(42)* %a3
+}
+
+declare i8* @llvm.launder.invariant.group.p0i8(i8*)
+declare i8 addrspace(42)* @llvm.launder.invariant.group.p42i8(i8 addrspace(42)*)
+declare i8* @llvm.strip.invariant.group.p0i8(i8*)
+declare i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)*)
+declare i16* @llvm.strip.invariant.group.p0i16(i16* %c1)
+declare i16 addrspace(42)* @llvm.strip.invariant.group.p42i16(i16 addrspace(42)* %c1)
+
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/InstCombine/invariant.ll b/llvm/test/Transforms/InstCombine/invariant.ll
new file mode 100644
index 00000000000..21e5f0fe858
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/invariant.ll
@@ -0,0 +1,28 @@
+; Test to make sure unused llvm.invariant.start calls are not trivially eliminated
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @g(i8*)
+declare void @g_addr1(i8 addrspace(1)*)
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
+declare {}* @llvm.invariant.start.p1i8(i64, i8 addrspace(1)* nocapture) nounwind readonly
+
+define i8 @f() {
+  %a = alloca i8                                  ; <i8*> [#uses=4]
+  store i8 0, i8* %a
+  %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) ; <{}*> [#uses=0]
+  ; CHECK: call {}* @llvm.invariant.start.p0i8
+  call void @g(i8* %a)
+  %r = load i8, i8* %a                                ; <i8> [#uses=1]
+  ret i8 %r
+}
+
+; make sure llvm.invariant.call in non-default addrspace are also not eliminated.
+define i8 @f_addrspace1(i8 addrspace(1)* %a) {
+  store i8 0, i8 addrspace(1)* %a
+  %i = call {}* @llvm.invariant.start.p1i8(i64 1, i8 addrspace(1)* %a) ; <{}*> [#uses=0]
+  ; CHECK: call {}* @llvm.invariant.start.p1i8
+  call void @g_addr1(i8 addrspace(1)* %a)
+  %r = load i8, i8 addrspace(1)* %a                                ; <i8> [#uses=1]
+  ret i8 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-scalar.ll b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-scalar.ll
new file mode 100644
index 00000000000..b7cf96d1f22
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-scalar.ll
@@ -0,0 +1,318 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; If we have a masked merge, in the form of: (M is not constant)
+;   ((x ^ y) & ~M) ^ y
+; We can de-invert the M:
+;   ((x ^ y) & M) ^ x
+
+define i4 @scalar (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @scalar(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %n1, %y
+  ret i4 %r
+}
+
+; ============================================================================ ;
+; Various cases with %x and/or %y being a constant
+; ============================================================================ ;
+
+define i4 @in_constant_varx_mone_invmask(i4 %x, i4 %mask) {
+; CHECK-LABEL: @in_constant_varx_mone_invmask(
+; CHECK-NEXT:    [[N1_DEMORGAN:%.*]] = or i4 [[X:%.*]], [[MASK:%.*]]
+; CHECK-NEXT:    ret i4 [[N1_DEMORGAN]]
+;
+  %notmask = xor i4 %mask, -1
+  %n0 = xor i4 %x, -1 ; %x
+  %n1 = and i4 %n0, %notmask
+  %r = xor i4 %n1, -1
+  ret i4 %r
+}
+
+define i4 @in_constant_varx_6_invmask(i4 %x, i4 %mask) {
+; CHECK-LABEL: @in_constant_varx_6_invmask(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], 6
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %notmask = xor i4 %mask, -1
+  %n0 = xor i4 %x, 6 ; %x
+  %n1 = and i4 %n0, %notmask
+  %r = xor i4 %n1, 6
+  ret i4 %r
+}
+
+define i4 @in_constant_mone_vary_invmask(i4 %y, i4 %mask) {
+; CHECK-LABEL: @in_constant_mone_vary_invmask(
+; CHECK-NEXT:    [[N1_DEMORGAN:%.*]] = or i4 [[Y:%.*]], [[MASK:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = xor i4 [[N1_DEMORGAN]], -1
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %notmask = xor i4 %mask, -1
+  %n0 = xor i4 -1, %y ; %x
+  %n1 = and i4 %n0, %notmask
+  %r = xor i4 %n1, %y
+  ret i4 %r
+}
+
+define i4 @in_constant_6_vary_invmask(i4 %y, i4 %mask) {
+; CHECK-LABEL: @in_constant_6_vary_invmask(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[Y:%.*]], 6
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], 6
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %notmask = xor i4 %mask, -1
+  %n0 = xor i4 %y, 6 ; %x
+  %n1 = and i4 %n0, %notmask
+  %r = xor i4 %n1, %y
+  ret i4 %r
+}
+
+; ============================================================================ ;
+; Commutativity
+; ============================================================================ ;
+
+; Used to make sure that the IR complexity sorting does not interfere.
+declare i4 @gen4()
+
+; FIXME: should the  %n1 = and i4 %im, %n0  swapped order pattern be tested?
+
+define i4 @c_1_0_0 (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @c_1_0_0(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %n0 = xor i4 %y, %x ; swapped order
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %n1, %y
+  ret i4 %r
+}
+
+define i4 @c_0_1_0 (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @c_0_1_0(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %n1, %x ; %x instead of %y
+  ret i4 %r
+}
+
+define i4 @c_0_0_1 (i4 %m) {
+; CHECK-LABEL: @c_0_0_1(
+; CHECK-NEXT:    [[X:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[Y:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %x  = call i4 @gen4()
+  %y  = call i4 @gen4()
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %y, %n1 ; swapped order
+  ret i4 %r
+}
+
+define i4 @c_1_1_0 (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @c_1_1_0(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %n0 = xor i4 %y, %x ; swapped order
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %n1, %x ; %x instead of %y
+  ret i4 %r
+}
+
+define i4 @c_1_0_1 (i4 %x, i4 %m) {
+; CHECK-LABEL: @c_1_0_1(
+; CHECK-NEXT:    [[Y:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %y  = call i4 @gen4()
+  %n0 = xor i4 %y, %x ; swapped order
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %y, %n1 ; swapped order
+  ret i4 %r
+}
+
+define i4 @c_0_1_1 (i4 %y, i4 %m) {
+; CHECK-LABEL: @c_0_1_1(
+; CHECK-NEXT:    [[X:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %x  = call i4 @gen4()
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %x, %n1 ; swapped order, %x instead of %y
+  ret i4 %r
+}
+
+define i4 @c_1_1_1 (i4 %m) {
+; CHECK-LABEL: @c_1_1_1(
+; CHECK-NEXT:    [[X:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[Y:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[Y]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %x  = call i4 @gen4()
+  %y  = call i4 @gen4()
+  %n0 = xor i4 %y, %x ; swapped order
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %x, %n1 ; swapped order, %x instead of %y
+  ret i4 %r
+}
+
+define i4 @commutativity_constant_varx_6_invmask(i4 %x, i4 %mask) {
+; CHECK-LABEL: @commutativity_constant_varx_6_invmask(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], 6
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %notmask = xor i4 %mask, -1
+  %n0 = xor i4 %x, 6 ; %x
+  %n1 = and i4 %notmask, %n0 ; swapped
+  %r = xor i4 %n1, 6
+  ret i4 %r
+}
+
+define i4 @commutativity_constant_6_vary_invmask(i4 %y, i4 %mask) {
+; CHECK-LABEL: @commutativity_constant_6_vary_invmask(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[Y:%.*]], 6
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], 6
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %notmask = xor i4 %mask, -1
+  %n0 = xor i4 %y, 6 ; %x
+  %n1 = and i4 %notmask, %n0 ; swapped
+  %r = xor i4 %n1, %y
+  ret i4 %r
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use4(i4)
+
+define i4 @n_oneuse_D_is_ok (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @n_oneuse_D_is_ok(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[TMP1]], [[X]]
+; CHECK-NEXT:    call void @use4(i4 [[N0]])
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %n0 = xor i4 %x, %y ; two uses of %n0, THIS IS OK!
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %n1, %y
+  call void @use4(i4 %n0)
+  ret i4 %r
+}
+
+define i4 @n_oneuse_A (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @n_oneuse_A(
+; CHECK-NEXT:    [[IM:%.*]] = xor i4 [[M:%.*]], -1
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(i4 [[N1]])
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %im ; two uses of %n1, which is going to be replaced
+  %r  = xor i4 %n1, %y
+  call void @use4(i4 %n1)
+  ret i4 %r
+}
+
+define i4 @n_oneuse_AD (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @n_oneuse_AD(
+; CHECK-NEXT:    [[IM:%.*]] = xor i4 [[M:%.*]], -1
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(i4 [[N0]])
+; CHECK-NEXT:    call void @use4(i4 [[N1]])
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %im ; two uses of %n1, which is going to be replaced
+  %r  = xor i4 %n1, %y
+  call void @use4(i4 %n0)
+  call void @use4(i4 %n1)
+  ret i4 %r
+}
+
+; Some third variable is used
+
+define i4 @n_third_var (i4 %x, i4 %y, i4 %z, i4 %m) {
+; CHECK-LABEL: @n_third_var(
+; CHECK-NEXT:    [[IM:%.*]] = xor i4 [[M:%.*]], -1
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, -1
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %n1, %z ; not %x or %y
+  ret i4 %r
+}
+
+define i4 @n_badxor (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @n_badxor(
+; CHECK-NEXT:    [[IM:%.*]] = xor i4 [[M:%.*]], 1
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %im = xor i4 %m, 1 ; not -1
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %im
+  %r  = xor i4 %n1, %y
+  ret i4 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll
new file mode 100644
index 00000000000..a2e427b0c46
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll
@@ -0,0 +1,421 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; If we have a masked merge, in the form of: (M is not constant)
+;   ((x ^ y) & ~M) ^ y
+; We can de-invert the M:
+;   ((x ^ y) & M) ^ x
+
+define <2 x i4> @vector (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @vector(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <3 x i4> @vector_undef (<3 x i4> %x, <3 x i4> %y, <3 x i4> %m) {
+; CHECK-LABEL: @vector_undef(
+; CHECK-NEXT:    [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <3 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i4> [[R]]
+;
+  %im = xor <3 x i4> %m, <i4 -1, i4 undef, i4 -1>
+  %n0 = xor <3 x i4> %x, %y
+  %n1 = and <3 x i4> %n0, %im
+  %r  = xor <3 x i4> %n1, %y
+  ret <3 x i4> %r
+}
+
+; ============================================================================ ;
+; Various cases with %x and/or %y being a constant
+; ============================================================================ ;
+
+define <2 x i4> @in_constant_varx_mone_invmask(<2 x i4> %x, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_varx_mone_invmask(
+; CHECK-NEXT:    [[N1_DEMORGAN:%.*]] = or <2 x i4> [[X:%.*]], [[MASK:%.*]]
+; CHECK-NEXT:    ret <2 x i4> [[N1_DEMORGAN]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, <i4 -1, i4 -1> ; %x
+  %n1 = and <2 x i4> %n0, %notmask
+  %r = xor <2 x i4> %n1, <i4 -1, i4 -1>
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @in_constant_varx_6_invmask(<2 x i4> %x, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_varx_6_invmask(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], <i4 6, i4 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, <i4 6, i4 6> ; %x
+  %n1 = and <2 x i4> %n0, %notmask
+  %r = xor <2 x i4> %n1, <i4 6, i4 6>
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @in_constant_varx_6_invmask_nonsplat(<2 x i4> %x, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_varx_6_invmask_nonsplat(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], <i4 6, i4 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, <i4 6, i4 7> ; %x
+  %n1 = and <2 x i4> %n0, %notmask
+  %r = xor <2 x i4> %n1, <i4 6, i4 7>
+  ret <2 x i4> %r
+}
+
+define <3 x i4> @in_constant_varx_6_invmask_undef(<3 x i4> %x, <3 x i4> %mask) {
+; CHECK-LABEL: @in_constant_varx_6_invmask_undef(
+; CHECK-NEXT:    [[N0:%.*]] = xor <3 x i4> [[X:%.*]], <i4 6, i4 undef, i4 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i4> [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <3 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i4> [[R]]
+;
+  %notmask = xor <3 x i4> %mask, <i4 -1, i4 undef, i4 -1>
+  %n0 = xor <3 x i4> %x, <i4 6, i4 undef, i4 7> ; %x
+  %n1 = and <3 x i4> %n0, %notmask
+  %r = xor <3 x i4> %n1, <i4 6, i4 undef, i4 7>
+  ret <3 x i4> %r
+}
+
+define <2 x i4> @in_constant_mone_vary_invmask(<2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_mone_vary_invmask(
+; CHECK-NEXT:    [[N1_DEMORGAN:%.*]] = or <2 x i4> [[Y:%.*]], [[MASK:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = xor <2 x i4> [[N1_DEMORGAN]], <i4 -1, i4 -1>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> <i4 -1, i4 -1>, %y ; %x
+  %n1 = and <2 x i4> %n0, %notmask
+  %r = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @in_constant_6_vary_invmask(<2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_6_vary_invmask(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], <i4 6, i4 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], <i4 6, i4 6>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %y, <i4 6, i4 6> ; %x
+  %n1 = and <2 x i4> %n0, %notmask
+  %r = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @in_constant_6_vary_invmask_nonsplat(<2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_6_vary_invmask_nonsplat(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], <i4 6, i4 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], <i4 6, i4 7>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %y, <i4 6, i4 7> ; %x
+  %n1 = and <2 x i4> %n0, %notmask
+  %r = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <3 x i4> @in_constant_6_vary_invmask_undef(<3 x i4> %y, <3 x i4> %mask) {
+; CHECK-LABEL: @in_constant_6_vary_invmask_undef(
+; CHECK-NEXT:    [[N0:%.*]] = xor <3 x i4> [[Y:%.*]], <i4 6, i4 undef, i4 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i4> [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <3 x i4> [[TMP1]], <i4 6, i4 undef, i4 6>
+; CHECK-NEXT:    ret <3 x i4> [[R]]
+;
+  %notmask = xor <3 x i4> %mask, <i4 -1, i4 undef, i4 -1>
+  %n0 = xor <3 x i4> %y, <i4 6, i4 undef, i4 6> ; %x
+  %n1 = and <3 x i4> %n0, %notmask
+  %r = xor <3 x i4> %n1, %y
+  ret <3 x i4> %r
+}
+
+; ============================================================================ ;
+; Commutativity
+; ============================================================================ ;
+
+; Used to make sure that the IR complexity sorting does not interfere.
+declare <2 x i4> @gen4()
+
+; FIXME: should  %n1 = and <2 x i4> %im, %n0  swapped order pattern be tested?
+
+define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @c_1_0_0(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %y, %x ; swapped order
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @c_0_1_0(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %n1, %x ; %x instead of %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_0_0_1 (<2 x i4> %m) {
+; CHECK-LABEL: @c_0_0_1(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %x  = call <2 x i4> @gen4()
+  %y  = call <2 x i4> @gen4()
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %y, %n1 ; swapped order
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @c_1_1_0(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %y, %x ; swapped order
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %n1, %x ; %x instead of %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_1_0_1 (<2 x i4> %x, <2 x i4> %m) {
+; CHECK-LABEL: @c_1_0_1(
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %y  = call <2 x i4> @gen4()
+  %n0 = xor <2 x i4> %y, %x ; swapped order
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %y, %n1 ; swapped order
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_0_1_1 (<2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @c_0_1_1(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %x  = call <2 x i4> @gen4()
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %x, %n1 ; swapped order, %x instead of %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_1_1_1 (<2 x i4> %m) {
+; CHECK-LABEL: @c_1_1_1(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[Y]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %x  = call <2 x i4> @gen4()
+  %y  = call <2 x i4> @gen4()
+  %n0 = xor <2 x i4> %y, %x ; swapped order
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %x, %n1 ; swapped order, %x instead of %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @commutativity_constant_varx_6_invmask(<2 x i4> %x, <2 x i4> %mask) {
+; CHECK-LABEL: @commutativity_constant_varx_6_invmask(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], <i4 6, i4 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, <i4 6, i4 6> ; %x
+  %n1 = and <2 x i4> %notmask, %n0 ; swapped
+  %r = xor <2 x i4> %n1, <i4 6, i4 6>
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @commutativity_constant_6_vary_invmask(<2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @commutativity_constant_6_vary_invmask(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[Y:%.*]], <i4 6, i4 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[MASK:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], <i4 6, i4 6>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %y, <i4 6, i4 6> ; %x
+  %n1 = and <2 x i4> %notmask, %n0 ; swapped
+  %r = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use4(<2 x i4>)
+
+define <2 x i4> @n_oneuse_D_is_ok (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @n_oneuse_D_is_ok(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[TMP1]], [[X]]
+; CHECK-NEXT:    call void @use4(<2 x i4> [[N0]])
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, %y ; two uses of %n0, THIS IS OK!
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %n1, %y
+  call void @use4(<2 x i4> %n0)
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @n_oneuse_A(
+; CHECK-NEXT:    [[IM:%.*]] = xor <2 x i4> [[M:%.*]], <i4 -1, i4 -1>
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(<2 x i4> [[N1]])
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %im ; two uses of %n1, which is going to be replaced
+  %r  = xor <2 x i4> %n1, %y
+  call void @use4(<2 x i4> %n1)
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @n_oneuse_AD (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @n_oneuse_AD(
+; CHECK-NEXT:    [[IM:%.*]] = xor <2 x i4> [[M:%.*]], <i4 -1, i4 -1>
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(<2 x i4> [[N0]])
+; CHECK-NEXT:    call void @use4(<2 x i4> [[N1]])
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, %y ; two uses of %n0 IS OK
+  %n1 = and <2 x i4> %n0, %im ; two uses of %n1, which is going to be replaced
+  %r  = xor <2 x i4> %n1, %y
+  call void @use4(<2 x i4> %n0)
+  call void @use4(<2 x i4> %n1)
+  ret <2 x i4> %r
+}
+
+; Some third variable is used
+
+define <2 x i4> @n_third_var (<2 x i4> %x, <2 x i4> %y, <2 x i4> %z, <2 x i4> %m) {
+; CHECK-LABEL: @n_third_var(
+; CHECK-NEXT:    [[IM:%.*]] = xor <2 x i4> [[M:%.*]], <i4 -1, i4 -1>
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %im
+  %r  = xor <2 x i4> %n1, %z ; not %x or %y
+  ret <2 x i4> %r
+}
+
+
+define <2 x i4> @n_third_var_const(<2 x i4> %x, <2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @n_third_var_const(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = xor <2 x i4> [[MASK:%.*]], <i4 -1, i4 -1>
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], <i4 6, i4 7>
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], [[NOTMASK]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], <i4 7, i4 6>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %notmask = xor <2 x i4> %mask, <i4 -1, i4 -1>
+  %n0 = xor <2 x i4> %x, <i4 6, i4 7> ; %x
+  %n1 = and <2 x i4> %n0, %notmask
+  %r = xor <2 x i4> %n1, <i4 7, i4 6>
+  ret <2 x i4> %r
+}
+
+; Bad xor
+
+define <2 x i4> @n_badxor_splat (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @n_badxor_splat(
+; CHECK-NEXT:    [[IM:%.*]] = xor <2 x i4> [[M:%.*]], <i4 1, i4 1>
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 1, i4 1> ; not -1
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %im ; two uses of %n1, which is going to be replaced
+  %r  = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @n_badxor (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @n_badxor(
+; CHECK-NEXT:    [[IM:%.*]] = xor <2 x i4> [[M:%.*]], <i4 -1, i4 1>
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], [[IM]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %im = xor <2 x i4> %m, <i4 -1, i4 1> ; not -1
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %im ; two uses of %n1, which is going to be replaced
+  %r  = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
diff --git a/llvm/test/Transforms/InstCombine/invoke.ll b/llvm/test/Transforms/InstCombine/invoke.ll
new file mode 100644
index 00000000000..deb4a2b87a4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/invoke.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+declare i8* @_Znwm(i64)
+
+
+; CHECK-LABEL: @f1(
+define i64 @f1() nounwind uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; CHECK: nvoke noalias i8* undef()
+  %call = invoke noalias i8* undef()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; CHECK: ret i64 0
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+  ret i64 %0
+
+lpad:
+  %1 = landingpad { i8*, i32 }
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
+
+; CHECK-LABEL: @f2(
+define i64 @f2() nounwind uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; CHECK: nvoke noalias i8* null()
+  %call = invoke noalias i8* null()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; CHECK: ret i64 0
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+  ret i64 %0
+
+lpad:
+  %1 = landingpad { i8*, i32 }
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
+
+; CHECK-LABEL: @f2_no_null_opt(
+define i64 @f2_no_null_opt() nounwind uwtable ssp #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; CHECK: invoke noalias i8* null()
+  %call = invoke noalias i8* null()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %call, i1 false, i1 false, i1 false)
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+  ret i64 %0
+
+lpad:
+  %1 = landingpad { i8*, i32 }
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
+attributes #0 = { "null-pointer-is-valid"="true" }
+
+; CHECK-LABEL: @f3(
+define void @f3() nounwind uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: invoke void @llvm.donothing()
+  %call = invoke noalias i8* @_Znwm(i64 13)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret void
+
+lpad:
+  %1 = landingpad { i8*, i32 }
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
diff --git a/llvm/test/Transforms/InstCombine/isascii-1.ll b/llvm/test/Transforms/InstCombine/isascii-1.ll
new file mode 100644
index 00000000000..88f5ad66d2e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/isascii-1.ll
@@ -0,0 +1,32 @@
+; Test that the isascii library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @isascii(i32)
+
+; Check isascii(c) -> c <u 128.
+
+define i32 @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %ret = call i32 @isascii(i32 127)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %ret = call i32 @isascii(i32 128)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3(i32 %x) {
+; CHECK-LABEL: @test_simplify3(
+  %ret = call i32 @isascii(i32 %x)
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %x, 128
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[ZEXT]]
+}
diff --git a/llvm/test/Transforms/InstCombine/isdigit-1.ll b/llvm/test/Transforms/InstCombine/isdigit-1.ll
new file mode 100644
index 00000000000..6791307aeae
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/isdigit-1.ll
@@ -0,0 +1,48 @@
+; Test that the isdigit library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @isdigit(i32)
+
+; Check isdigit(c) -> (c - '0') <u 10;
+
+define i32 @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %ret = call i32 @isdigit(i32 47)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %ret = call i32 @isdigit(i32 48)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %ret = call i32 @isdigit(i32 57)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+  %ret = call i32 @isdigit(i32 58)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify5(i32 %x) {
+; CHECK-LABEL: @test_simplify5(
+
+  %ret = call i32 @isdigit(i32 %x)
+; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add i32 %x, -48
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 [[ADD]], 10
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[ZEXT]]
+}
diff --git a/llvm/test/Transforms/InstCombine/known-never-nan.ll b/llvm/test/Transforms/InstCombine/known-never-nan.ll
new file mode 100644
index 00000000000..23a0780fe43
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/known-never-nan.ll
@@ -0,0 +1,196 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+; This file used to contain more tests that folded to true/false,
+; but those are all tested identically in InstSimplify now.
+; If any remaining tests are made to return true/false, that
+; functionality/testing may be better housed in InstSimplify
+; rather than InstCombine.
+
+define i1 @fabs_sqrt_src_maybe_nan(double %arg0, double %arg1) {
+; CHECK-LABEL: @fabs_sqrt_src_maybe_nan(
+; CHECK-NEXT:    [[FABS:%.*]] = call double @llvm.fabs.f64(double [[ARG0:%.*]])
+; CHECK-NEXT:    [[OP:%.*]] = call double @llvm.sqrt.f64(double [[FABS]])
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %fabs = call double @llvm.fabs.f64(double %arg0)
+  %op = call double @llvm.sqrt.f64(double %fabs)
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @select_maybe_nan_lhs(i1 %cond, double %lhs, double %arg1) {
+; CHECK-LABEL: @select_maybe_nan_lhs(
+; CHECK-NEXT:    [[RHS:%.*]] = fadd nnan double [[ARG1:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[LHS:%.*]], double [[RHS]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %rhs = fadd nnan double %arg1, 1.0
+  %op = select i1 %cond, double %lhs, double %rhs
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @select_maybe_nan_rhs(i1 %cond, double %arg0, double %rhs) {
+; CHECK-LABEL: @select_maybe_nan_rhs(
+; CHECK-NEXT:    [[LHS:%.*]] = fadd nnan double [[ARG0:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[LHS]], double [[RHS:%.*]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %lhs = fadd nnan double %arg0, 1.0
+  %op = select i1 %cond, double %lhs, double %rhs
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @nnan_fadd(double %arg0, double %arg1) {
+; CHECK-LABEL: @nnan_fadd(
+; CHECK-NEXT:    [[NNAN_ARG0:%.*]] = fadd nnan double [[ARG0:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[NNAN_ARG1:%.*]] = fadd nnan double [[ARG0]], 2.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = fadd double [[NNAN_ARG0]], [[NNAN_ARG1]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %nnan.arg0 = fadd nnan double %arg0, 1.0
+  %nnan.arg1 = fadd nnan double %arg0, 2.0
+  %op = fadd double %nnan.arg0, %nnan.arg1
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @nnan_fadd_maybe_nan_lhs(double %arg0, double %arg1) {
+; CHECK-LABEL: @nnan_fadd_maybe_nan_lhs(
+; CHECK-NEXT:    [[NNAN_ARG1:%.*]] = fadd nnan double [[ARG1:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = fadd double [[NNAN_ARG1]], [[ARG0:%.*]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %nnan.arg1 = fadd nnan double %arg1, 1.0
+  %op = fadd double %arg0, %nnan.arg1
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @nnan_fadd_maybe_nan_rhs(double %arg0, double %arg1) {
+; CHECK-LABEL: @nnan_fadd_maybe_nan_rhs(
+; CHECK-NEXT:    [[NNAN_ARG0:%.*]] = fadd nnan double [[ARG0:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = fadd double [[NNAN_ARG0]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %nnan.arg0 = fadd nnan double %arg0, 1.0
+  %op = fadd double %nnan.arg0, %arg1
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @nnan_fmul(double %arg0, double %arg1) {
+; CHECK-LABEL: @nnan_fmul(
+; CHECK-NEXT:    [[NNAN_ARG0:%.*]] = fadd nnan double [[ARG0:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[NNAN_ARG1:%.*]] = fadd nnan double [[ARG0]], 2.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = fmul double [[NNAN_ARG0]], [[NNAN_ARG1]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %nnan.arg0 = fadd nnan double %arg0, 1.0
+  %nnan.arg1 = fadd nnan double %arg0, 2.0
+  %op = fmul double %nnan.arg0, %nnan.arg1
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @nnan_fsub(double %arg0, double %arg1) {
+; CHECK-LABEL: @nnan_fsub(
+; CHECK-NEXT:    [[NNAN_ARG0:%.*]] = fadd nnan double [[ARG0:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[NNAN_ARG1:%.*]] = fadd nnan double [[ARG0]], 2.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = fsub double [[NNAN_ARG0]], [[NNAN_ARG1]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %nnan.arg0 = fadd nnan double %arg0, 1.0
+  %nnan.arg1 = fadd nnan double %arg0, 2.0
+  %op = fsub double %nnan.arg0, %nnan.arg1
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+declare double @func()
+
+define i1 @nnan_fneg() {
+; CHECK-LABEL: @nnan_fneg(
+; CHECK-NEXT:    [[NNAN:%.*]] = call nnan double @func()
+; CHECK-NEXT:    ret i1 true
+;
+  %nnan = call nnan double @func()
+  %op = fsub double -0.0, %nnan
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @fpext_maybe_nan(float %arg0) {
+; CHECK-LABEL: @fpext_maybe_nan(
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord float [[ARG0:%.*]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %op = fpext float %arg0 to double
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @fptrunc_maybe_nan(double %arg0) {
+; CHECK-LABEL: @fptrunc_maybe_nan(
+; CHECK-NEXT:    [[OP:%.*]] = fptrunc double [[ARG0:%.*]] to float
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord float [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %op = fptrunc double %arg0 to float
+  %tmp = fcmp ord float %op, %op
+  ret i1 %tmp
+}
+
+define i1 @nnan_fdiv(double %arg0, double %arg1) {
+; CHECK-LABEL: @nnan_fdiv(
+; CHECK-NEXT:    [[NNAN_ARG0:%.*]] = fadd nnan double [[ARG0:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[NNAN_ARG1:%.*]] = fadd nnan double [[ARG0]], 2.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = fdiv double [[NNAN_ARG0]], [[NNAN_ARG1]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %nnan.arg0 = fadd nnan double %arg0, 1.0
+  %nnan.arg1 = fadd nnan double %arg0, 2.0
+  %op = fdiv double %nnan.arg0, %nnan.arg1
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+define i1 @nnan_frem(double %arg0, double %arg1) {
+; CHECK-LABEL: @nnan_frem(
+; CHECK-NEXT:    [[NNAN_ARG0:%.*]] = fadd nnan double [[ARG0:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[NNAN_ARG1:%.*]] = fadd nnan double [[ARG0]], 2.000000e+00
+; CHECK-NEXT:    [[OP:%.*]] = frem double [[NNAN_ARG0]], [[NNAN_ARG1]]
+; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[OP]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %nnan.arg0 = fadd nnan double %arg0, 1.0
+  %nnan.arg1 = fadd nnan double %arg0, 2.0
+  %op = frem double %nnan.arg0, %nnan.arg1
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
+declare double @llvm.sqrt.f64(double)
+declare double @llvm.fabs.f64(double)
+declare double @llvm.canonicalize.f64(double)
+declare double @llvm.copysign.f64(double, double)
+declare double @llvm.exp.f64(double)
+declare double @llvm.exp2.f64(double)
+declare double @llvm.floor.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.rint.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.round.f64(double)
+
diff --git a/llvm/test/Transforms/InstCombine/known_align.ll b/llvm/test/Transforms/InstCombine/known_align.ll
new file mode 100644
index 00000000000..653c4c51e2d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/known_align.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S | grep "align 1"
+; END.
+
+	%struct.p = type <{ i8, i32 }>
+@t = global %struct.p <{ i8 1, i32 10 }>		; <%struct.p*> [#uses=1]
+@u = weak global %struct.p zeroinitializer		; <%struct.p*> [#uses=1]
+
+define i32 @main() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp1 = alloca i32, align 4		; <i32*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp3 = load i32, i32* getelementptr (%struct.p, %struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=1]
+	store i32 %tmp3, i32* %tmp1, align 4
+	%tmp5 = load i32, i32* %tmp1, align 4		; <i32> [#uses=1]
+	store i32 %tmp5, i32* getelementptr (%struct.p, %struct.p* @u, i32 0, i32 1), align 1
+	%tmp6 = load i32, i32* %tmp1, align 4		; <i32> [#uses=1]
+	store i32 %tmp6, i32* %tmp, align 4
+	%tmp7 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
+	store i32 %tmp7, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval8 = load i32, i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
diff --git a/llvm/test/Transforms/InstCombine/lifetime-asan.ll b/llvm/test/Transforms/InstCombine/lifetime-asan.ll
new file mode 100644
index 00000000000..e7b996def82
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/lifetime-asan.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @foo(i8* nocapture)
+
+define void @asan() sanitize_address {
+entry:
+  ; CHECK-LABEL: @asan(
+  %text = alloca i8, align 1
+
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
+  ; CHECK: call void @llvm.lifetime.start
+  ; CHECK-NEXT: call void @llvm.lifetime.end
+
+  call void @foo(i8* %text) ; Keep alloca alive
+
+  ret void
+}
+
+define void @hwasan() sanitize_hwaddress {
+entry:
+  ; CHECK-LABEL: @hwasan(
+  %text = alloca i8, align 1
+
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
+  ; CHECK: call void @llvm.lifetime.start
+  ; CHECK-NEXT: call void @llvm.lifetime.end
+
+  call void @foo(i8* %text) ; Keep alloca alive
+
+  ret void
+}
+
+define void @no_asan() {
+entry:
+  ; CHECK-LABEL: @no_asan(
+  %text = alloca i8, align 1
+
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
+  ; CHECK-NO: call void @llvm.lifetime
+
+  call void @foo(i8* %text) ; Keep alloca alive
+
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll b/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll
new file mode 100644
index 00000000000..ee3668b3d80
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll
@@ -0,0 +1,94 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @foo(i8* nocapture, i8* nocapture)
+
+define void @bar(i1 %flag) #0 !dbg !4 {
+entry:
+; CHECK-LABEL: @bar(
+; CHECK: %[[T:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %text
+; CHECK: %[[B:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %buff
+; CHECK: if:
+; CHECK-NEXT: br label %bb2
+; CHECK: bb2:
+; CHECK-NEXT: br label %bb3
+; CHECK: bb3:
+; CHECK-NEXT: call void @llvm.dbg.declare
+; CHECK-NEXT: br label %fin
+; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @foo(i8* %[[B]], i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* %[[T]])
+  %text = alloca [1 x i8], align 1
+  %buff = alloca [1 x i8], align 1
+  %0 = getelementptr inbounds [1 x i8], [1 x i8]* %text, i64 0, i64 0
+  %1 = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i64 0, i64 0
+  br i1 %flag, label %if, label %else
+
+if:
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  br label %bb2
+
+bb2:
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
+  br label %bb3
+
+bb3:
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.dbg.declare(metadata [1 x i8]* %text, metadata !14, metadata !25), !dbg !26
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  br label %fin
+
+else:
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
+  call void @foo(i8* %1, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  br  label %fin
+
+fin:
+  ret void
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test.cpp", directory: "/home/user")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!8 = !{!9, !11, !12, !14, !21}
+!9 = !DILocalVariable(name: "Size", arg: 1, scope: !4, file: !1, line: 2, type: !10)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "flag", arg: 2, scope: !4, file: !1, line: 2, type: !7)
+!12 = !DILocalVariable(name: "i", scope: !13, file: !1, line: 3, type: !10)
+!13 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 3)
+!14 = !DILocalVariable(name: "text", scope: !15, file: !1, line: 4, type: !17)
+!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 3, column: 30)
+!16 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 3)
+!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 8, align: 8, elements: !19)
+!18 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!19 = !{!20}
+!20 = !DISubrange(count: 1)
+!21 = !DILocalVariable(name: "buff", scope: !15, file: !1, line: 5, type: !17)
+!22 = !{i32 2, !"Dwarf Version", i32 4}
+!23 = !{i32 2, !"Debug Info Version", i32 3}
+!24 = !{!"clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)"}
+!25 = !DIExpression()
+!26 = !DILocation(line: 4, column: 10, scope: !15)
diff --git a/llvm/test/Transforms/InstCombine/lifetime.ll b/llvm/test/Transforms/InstCombine/lifetime.ll
new file mode 100644
index 00000000000..8eb6646c841
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/lifetime.ll
@@ -0,0 +1,92 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @foo(i8* nocapture, i8* nocapture)
+
+define void @bar(i1 %flag) !dbg !4 {
+entry:
+; CHECK-LABEL: @bar(
+; CHECK: %[[T:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %text
+; CHECK: %[[B:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %buff
+; CHECK: if:
+; CHECK-NEXT: br label %bb2
+; CHECK: bb2:
+; CHECK-NEXT: br label %bb3
+; CHECK: bb3:
+; CHECK-NEXT: call void @llvm.dbg.declare
+; CHECK-NEXT: br label %fin
+; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %[[B]])
+; CHECK-NEXT: call void @foo(i8* nonnull %[[B]], i8* nonnull %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %[[B]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %[[T]])
+  %text = alloca [1 x i8], align 1
+  %buff = alloca [1 x i8], align 1
+  %0 = getelementptr inbounds [1 x i8], [1 x i8]* %text, i64 0, i64 0
+  %1 = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i64 0, i64 0
+  br i1 %flag, label %if, label %else
+
+if:
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  br label %bb2
+
+bb2:
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
+  br label %bb3
+
+bb3:
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.dbg.declare(metadata [1 x i8]* %text, metadata !14, metadata !25), !dbg !26
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  br label %fin
+
+else:
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %1)
+  call void @foo(i8* %1, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %0)
+  br  label %fin
+
+fin:
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test.cpp", directory: "/home/user")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!8 = !{!9, !11, !12, !14, !21}
+!9 = !DILocalVariable(name: "Size", arg: 1, scope: !4, file: !1, line: 2, type: !10)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "flag", arg: 2, scope: !4, file: !1, line: 2, type: !7)
+!12 = !DILocalVariable(name: "i", scope: !13, file: !1, line: 3, type: !10)
+!13 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 3)
+!14 = !DILocalVariable(name: "text", scope: !15, file: !1, line: 4, type: !17)
+!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 3, column: 30)
+!16 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 3)
+!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 8, align: 8, elements: !19)
+!18 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!19 = !{!20}
+!20 = !DISubrange(count: 1)
+!21 = !DILocalVariable(name: "buff", scope: !15, file: !1, line: 5, type: !17)
+!22 = !{i32 2, !"Dwarf Version", i32 4}
+!23 = !{i32 2, !"Debug Info Version", i32 3}
+!24 = !{!"clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)"}
+!25 = !DIExpression()
+!26 = !DILocation(line: 4, column: 10, scope: !15)
diff --git a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
new file mode 100644
index 00000000000..09b0f0d35f1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S -data-layout="e-m:e-i64:64-f80:128-n8:16:32:64-S128" | FileCheck %s
+
+@a = global [1000 x float] zeroinitializer, align 16
+@b = global [1000 x float] zeroinitializer, align 16
+
+define void @_Z3foov() {
+; CHECK-LABEL: @_Z3foov(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_0]], 1000
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[I_0]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x float], [1000 x float]* @a, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [1000 x float], [1000 x float]* @b, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[CMP_I]], float [[TMP2]], float [[TMP1]]
+; CHECK-NEXT:    store float [[TMP3]], float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i.0, 1000
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  ret void
+
+for.body:                                         ; preds = %for.cond
+  %0 = zext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [1000 x float], [1000 x float]* @a, i64 0, i64 %0
+  %arrayidx2 = getelementptr inbounds [1000 x float], [1000 x float]* @b, i64 0, i64 %0
+  %1 = load float, float* %arrayidx, align 4
+  %2 = load float, float* %arrayidx2, align 4
+  %cmp.i = fcmp fast olt float %1, %2
+  %__b.__a.i = select i1 %cmp.i, float* %arrayidx2, float* %arrayidx
+  %3 = bitcast float* %__b.__a.i to i32*
+  %4 = load i32, i32* %3, align 4
+  %5 = bitcast float* %arrayidx to i32*
+  store i32 %4, i32* %5, align 4
+  %inc = add nuw nsw i32 %i.0, 1
+  br label %for.cond
+}
+
+define i32 @store_bitcasted_load(i1 %cond, float* dereferenceable(4) %addr1, float* dereferenceable(4) %addr2) {
+; CHECK-LABEL: @store_bitcasted_load(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], float* [[ADDR1:%.*]], float* [[ADDR2:%.*]]
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast float* [[SEL]] to i32*
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[BC1]], align 4
+; CHECK-NEXT:    ret i32 [[LD]]
+;
+  %sel = select i1 %cond, float* %addr1, float* %addr2
+  %bc1 = bitcast float* %sel to i32*
+  %ld = load i32, i32* %bc1
+  ret i32 %ld
+}
+
+define void @bitcasted_store(i1 %cond, float* %loadaddr1, float* %loadaddr2, float* %storeaddr) {
+; CHECK-LABEL: @bitcasted_store(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], float* [[LOADADDR1:%.*]], float* [[LOADADDR2:%.*]]
+; CHECK-NEXT:    [[INT_LOAD_ADDR:%.*]] = bitcast float* [[SEL]] to i32*
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[INT_LOAD_ADDR]], align 4
+; CHECK-NEXT:    [[INT_STORE_ADDR:%.*]] = bitcast float* [[STOREADDR:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[LD]], i32* [[INT_STORE_ADDR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %sel = select i1 %cond, float* %loadaddr1, float* %loadaddr2
+  %int_load_addr = bitcast float* %sel to i32*
+  %ld = load i32, i32* %int_load_addr
+  %int_store_addr = bitcast float* %storeaddr to i32*
+  store i32 %ld, i32* %int_store_addr
+  ret void
+}
+
+define void @bitcasted_minmax_with_select_of_pointers(float* %loadaddr1, float* %loadaddr2, float* %storeaddr) {
+; CHECK-LABEL: @bitcasted_minmax_with_select_of_pointers(
+; CHECK-NEXT:    [[LD1:%.*]] = load float, float* [[LOADADDR1:%.*]], align 4
+; CHECK-NEXT:    [[LD2:%.*]] = load float, float* [[LOADADDR2:%.*]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = fcmp ogt float [[LD1]], [[LD2]]
+; CHECK-NEXT:    [[LD3:%.*]] = select i1 [[COND]], float [[LD1]], float [[LD2]]
+; CHECK-NEXT:    store float [[LD3]], float* [[STOREADDR:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %ld1 = load float, float* %loadaddr1, align 4
+  %ld2 = load float, float* %loadaddr2, align 4
+  %cond = fcmp ogt float %ld1, %ld2
+  %sel = select i1 %cond, float* %loadaddr1, float* %loadaddr2
+  %int_load_addr = bitcast float* %sel to i32*
+  %ld = load i32, i32* %int_load_addr, align 4
+  %int_store_addr = bitcast float* %storeaddr to i32*
+  store i32 %ld, i32* %int_store_addr, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/load-bitcast32.ll b/llvm/test/Transforms/InstCombine/load-bitcast32.ll
new file mode 100644
index 00000000000..b1c78a8a314
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-bitcast32.ll
@@ -0,0 +1,79 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+
+define i64* @test1(i8* %x) {
+entry:
+; CHECK-LABEL: @test1(
+; CHECK: load i64, i64*
+; CHECK: ret
+  %a = bitcast i8* %x to i64*
+  %b = load i64, i64* %a
+  %c = inttoptr i64 %b to i64*
+
+  ret i64* %c
+}
+
+define i32* @test2(i8* %x) {
+entry:
+; CHECK-LABEL: @test2(
+; CHECK: load i32*, i32**
+; CHECK: ret
+  %a = bitcast i8* %x to i32*
+  %b = load i32, i32* %a
+  %c = inttoptr i32 %b to i32*
+
+  ret i32* %c
+}
+
+define i64* @test3(i8* %x) {
+entry:
+; CHECK-LABEL: @test3(
+; CHECK: load i64*, i64**
+; CHECK: ret
+  %a = bitcast i8* %x to i32*
+  %b = load i32, i32* %a
+  %c = inttoptr i32 %b to i64*
+
+  ret i64* %c
+}
+
+define i64 @test4(i8* %x) {
+entry:
+; CHECK-LABEL: @test4(
+; CHECK: load i32, i32*
+; CHECK: zext
+; CHECK: ret
+  %a = bitcast i8* %x to i64**
+  %b = load i64*, i64** %a
+  %c = ptrtoint i64* %b to i64
+
+  ret i64 %c
+}
+
+define i32 @test5(i8* %x) {
+entry:
+; CHECK-LABEL: @test5(
+; CHECK: load i32, i32*
+; CHECK: ret
+  %a = bitcast i8* %x to i32**
+  %b = load i32*, i32** %a
+  %c = ptrtoint i32* %b to i32
+
+  ret i32 %c
+}
+
+define i64 @test6(i8* %x) {
+entry:
+; CHECK-LABEL: @test6(
+; CHECK: load i32, i32*
+; CHECK: zext
+; CHECK: ret
+  %a = bitcast i8* %x to i32**
+  %b = load i32*, i32** %a
+  %c = ptrtoint i32* %b to i64
+
+  ret i64 %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/load-bitcast64.ll b/llvm/test/Transforms/InstCombine/load-bitcast64.ll
new file mode 100644
index 00000000000..d14c686d83e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-bitcast64.ll
@@ -0,0 +1,78 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "p:64:64:64"
+
+
+define i64* @test1(i8* %x) {
+entry:
+; CHECK-LABEL: @test1(
+; CHECK: load i64*, i64**
+; CHECK: ret
+  %a = bitcast i8* %x to i64*
+  %b = load i64, i64* %a
+  %c = inttoptr i64 %b to i64*
+
+  ret i64* %c
+}
+
+define i32* @test2(i8* %x) {
+entry:
+; CHECK-LABEL: @test2(
+; CHECK: load i32, i32*
+; CHECK: ret
+  %a = bitcast i8* %x to i32*
+  %b = load i32, i32* %a
+  %c = inttoptr i32 %b to i32*
+
+  ret i32* %c
+}
+
+define i64* @test3(i8* %x) {
+entry:
+; CHECK-LABEL: @test3(
+; CHECK: load i32, i32*
+; CHECK: ret
+  %a = bitcast i8* %x to i32*
+  %b = load i32, i32* %a
+  %c = inttoptr i32 %b to i64*
+
+  ret i64* %c
+}
+
+define i64 @test4(i8* %x) {
+entry:
+; CHECK-LABEL: @test4(
+; CHECK: load i64, i64*
+; CHECK: ret
+  %a = bitcast i8* %x to i64**
+  %b = load i64*, i64** %a
+  %c = ptrtoint i64* %b to i64
+
+  ret i64 %c
+}
+
+define i32 @test5(i8* %x) {
+entry:
+; CHECK-LABEL: @test5(
+; CHECK: load i64, i64*
+; CHECK: trunc
+; CHECK: ret
+  %a = bitcast i8* %x to i32**
+  %b = load i32*, i32** %a
+  %c = ptrtoint i32* %b to i32
+
+  ret i32 %c
+}
+
+define i64 @test6(i8* %x) {
+entry:
+; CHECK-LABEL: @test6(
+; CHECK: load i64, i64*
+; CHECK: ret
+  %a = bitcast i8* %x to i32**
+  %b = load i32*, i32** %a
+  %c = ptrtoint i32* %b to i64
+
+  ret i64 %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
new file mode 100644
index 00000000000..5746b7aa28d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -0,0 +1,316 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S -data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s
+
+@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
+                                     i16 73, i16 82, i16 69, i16 68, i16 0]
+
+@G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
+                                                      i16 73, i16 82, i16 69, i16 68, i16 0]
+
+@GD = internal constant [6 x double]
+   [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
+
+%Foo = type { i32, i32, i32, i32 }
+
+@GS = internal constant %Foo { i32 1, i32 4, i32 9, i32 14 }
+
+@GStructArr = internal constant [4 x %Foo] [ %Foo { i32 1, i32 4, i32 9, i32 14 },
+                                             %Foo { i32 5, i32 4, i32 6, i32 11 },
+                                             %Foo { i32 6, i32 5, i32 9, i32 20 },
+                                             %Foo { i32 12, i32 3, i32 9, i32 8 } ]
+
+
+define i1 @test1(i32 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 %X, 9
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16, i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+}
+
+define i1 @test1_noinbounds(i32 %X) {
+; CHECK-LABEL: @test1_noinbounds(
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 %X, 9
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16, i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+}
+
+define i1 @test1_noinbounds_i64(i64 %X) {
+; CHECK-LABEL: @test1_noinbounds_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %X to i32
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X
+  %Q = load i16, i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+}
+
+define i1 @test1_noinbounds_as1(i32 %x) {
+; CHECK-LABEL: @test1_noinbounds_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %x to i16
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i16 [[TMP1]], 9
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
+  %q = load i16, i16 addrspace(1)* %p
+  %r = icmp eq i16 %q, 0
+  ret i1 %r
+
+}
+
+define i1 @test2(i32 %X) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 %X, 4
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16, i16* %P
+  %R = icmp slt i16 %Q, 85
+  ret i1 %R
+}
+
+define i1 @test3(i32 %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 %X, 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double, double* %P
+  %R = fcmp oeq double %Q, 1.0
+  ret i1 %R
+
+}
+
+define i1 @test4(i32 %X) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 933, %X
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16, i16* %P
+  %R = icmp sle i16 %Q, 73
+  ret i1 %R
+}
+
+define i1 @test4_i16(i16 %X) {
+; CHECK-LABEL: @test4_i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 %X to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 933, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i16 %X
+  %Q = load i16, i16* %P
+  %R = icmp sle i16 %Q, 73
+  ret i1 %R
+}
+
+define i1 @test5(i32 %X) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %X, 2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 %X, 7
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16, i16* %P
+  %R = icmp eq i16 %Q, 69
+  ret i1 %R
+}
+
+define i1 @test6(i32 %X) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %X, -1
+; CHECK-NEXT:    [[R:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double, double* %P
+  %R = fcmp ogt double %Q, 0.0
+  ret i1 %R
+}
+
+define i1 @test7(i32 %X) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %X, -1
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i32 [[TMP1]], 2
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double, double* %P
+  %R = fcmp olt double %Q, 0.0
+  ret i1 %R
+}
+
+define i1 @test8(i32 %X) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %X, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16, i16* %P
+  %R = and i16 %Q, 3
+  %S = icmp eq i16 %R, 0
+  ret i1 %S
+}
+
+@GA = internal constant [4 x { i32, i32 } ] [
+  { i32, i32 } { i32 1, i32 0 },
+  { i32, i32 } { i32 2, i32 1 },
+  { i32, i32 } { i32 3, i32 1 },
+  { i32, i32 } { i32 4, i32 0 }
+]
+
+define i1 @test9(i32 %X) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 %X, -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 2
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %P = getelementptr inbounds [4 x { i32, i32 } ], [4 x { i32, i32 } ]* @GA, i32 0, i32 %X, i32 1
+  %Q = load i32, i32* %P
+  %R = icmp eq i32 %Q, 1
+  ret i1 %R
+}
+
+define i1 @test10_struct(i32 %x) {
+; CHECK-LABEL: @test10_struct(
+; CHECK-NEXT:    ret i1 false
+;
+  %p = getelementptr inbounds %Foo, %Foo* @GS, i32 %x, i32 0
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds(i32 %x) {
+; CHECK-LABEL: @test10_struct_noinbounds(
+; CHECK-NEXT:    [[P:%.*]] = getelementptr %Foo, %Foo* @GS, i32 %x, i32 0
+; CHECK-NEXT:    [[Q:%.*]] = load i32, i32* [[P]], align 8
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[Q]], 9
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr %Foo, %Foo* @GS, i32 %x, i32 0
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index < ptr size
+define i1 @test10_struct_i16(i16 %x){
+; CHECK-LABEL: @test10_struct_i16(
+; CHECK-NEXT:    ret i1 false
+;
+  %p = getelementptr inbounds %Foo, %Foo* @GS, i16 %x, i32 0
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+; Test that the GEP indices are converted before we ever get here
+; Index > ptr size
+define i1 @test10_struct_i64(i64 %x){
+; CHECK-LABEL: @test10_struct_i64(
+; CHECK-NEXT:    ret i1 false
+;
+  %p = getelementptr inbounds %Foo, %Foo* @GS, i64 %x, i32 0
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_noinbounds_i16(i16 %x) {
+; CHECK-LABEL: @test10_struct_noinbounds_i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 %x to i32
+; CHECK-NEXT:    [[P:%.*]] = getelementptr %Foo, %Foo* @GS, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[Q:%.*]] = load i32, i32* [[P]], align 8
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[Q]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr %Foo, %Foo* @GS, i16 %x, i32 0
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 0
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr(i32 %x) {
+; CHECK-LABEL: @test10_struct_arr(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 %x, 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds(i32 %x) {
+; CHECK-LABEL: @test10_struct_arr_noinbounds(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 %x, 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i16(i16 %x) {
+; CHECK-LABEL: @test10_struct_arr_i16(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i16 %x, 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i16 0, i16 %x, i32 2
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_i64(i64 %x) {
+; CHECK-LABEL: @test10_struct_arr_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %x to i32
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i64 0, i64 %x, i32 2
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
+; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i16 %x, 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
+
+define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
+; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %x to i32
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
+  %q = load i32, i32* %p
+  %r = icmp eq i32 %q, 9
+  ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/load-combine-metadata-2.ll b/llvm/test/Transforms/InstCombine/load-combine-metadata-2.ll
new file mode 100644
index 00000000000..bec0d7d2c36
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-combine-metadata-2.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that align metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+  %a = load i32*, i32** %0, !align !0
+  %b = load i32*, i32** %0, !align !1
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  ret void
+}
+
+; CHECK: ![[ALIGN]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8}
+\ No newline at end of file
diff --git a/llvm/test/Transforms/InstCombine/load-combine-metadata-3.ll b/llvm/test/Transforms/InstCombine/load-combine-metadata-3.ll
new file mode 100644
index 00000000000..bad4bb24059
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-combine-metadata-3.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that dereferenceable metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+  %a = load i32*, i32** %0, !dereferenceable !0
+  %b = load i32*, i32** %0, !dereferenceable !1
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8}
+\ No newline at end of file
diff --git a/llvm/test/Transforms/InstCombine/load-combine-metadata-4.ll b/llvm/test/Transforms/InstCombine/load-combine-metadata-4.ll
new file mode 100644
index 00000000000..2a1ffcd0605
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-combine-metadata-4.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that dereferenceable_or_null metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+  %a = load i32*, i32** %0, !dereferenceable_or_null !0
+  %b = load i32*, i32** %0, !dereferenceable_or_null !1
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8}
diff --git a/llvm/test/Transforms/InstCombine/load-combine-metadata-dominance.ll b/llvm/test/Transforms/InstCombine/load-combine-metadata-dominance.ll
new file mode 100644
index 00000000000..25e352bdb7b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-combine-metadata-dominance.ll
@@ -0,0 +1,44 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that nonnull metadata is propagated from dominating load.
+; CHECK-LABEL: @combine_metadata_dominance1(
+; CHECK-LABEL: bb1:
+; CHECK: load i32*, i32** %p, align 8, !nonnull !0
+; CHECK-NOT: load i32*, i32** %p
+define void @combine_metadata_dominance1(i32** %p) {
+entry:
+  %a = load i32*, i32** %p, !nonnull !0
+  br label %bb1
+
+bb1:
+  %b = load i32*, i32** %p
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  ret void
+}
+
+declare i32 @use(i32*, i32) readonly
+
+; Check that nonnull from the dominated load does not get propagated.
+; There are some cases where it would be safe to keep it.
+; CHECK-LABEL: @combine_metadata_dominance2(
+; CHECK-NOT: nonnull
+define void @combine_metadata_dominance2(i32** %p) {
+entry:
+  %a = load i32*, i32** %p
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %b = load i32*, i32** %p, !nonnull !0
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  ret void
+
+bb2:
+  ret void
+}
+
+
+!0 = !{}
diff --git a/llvm/test/Transforms/InstCombine/load-combine-metadata.ll b/llvm/test/Transforms/InstCombine/load-combine-metadata.ll
new file mode 100644
index 00000000000..536f1bb75f6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-combine-metadata.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that range and AA metadata is combined
+; CHECK: %[[V:.*]] = load i32, i32* %0
+; CHECK-SAME: !tbaa !{{[0-9]+}}
+; CHECK-SAME: !range ![[RANGE:[0-9]+]]
+; CHECK: store i32 %[[V]], i32* %1
+; CHECK: store i32 %[[V]], i32* %2
+define void @test_load_load_combine_metadata(i32*, i32*, i32*) {
+  %a = load i32, i32* %0, !tbaa !8, !range !0, !alias.scope !5, !noalias !6
+  %b = load i32, i32* %0, !tbaa !8, !range !1
+  store i32 %a, i32* %1
+  store i32 %b, i32* %2
+  ret void
+}
+
+; CHECK: ![[RANGE]] = !{i32 0, i32 5}
+!0 = !{ i32 0, i32 5 }
+!1 = !{ i32 7, i32 9 }
+!2 = !{!2}
+!3 = !{!3, !2}
+!4 = !{!4, !2}
+!5 = !{!3}
+!6 = !{!4}
+!7 = !{ !"tbaa root" }
+!8 = !{ !9, !9, i64 0 }
+!9 = !{ !"scalar type", !7}
diff --git a/llvm/test/Transforms/InstCombine/load-select.ll b/llvm/test/Transforms/InstCombine/load-select.ll
new file mode 100644
index 00000000000..dfc07983eff
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-select.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+@a = constant [2 x i32] [i32 3, i32 6]            ; <[2 x i32]*> [#uses=2]
+
+define i32 @b(i32 %y) nounwind readonly {
+; CHECK-LABEL: @b(
+; CHECK-NOT: load
+; CHECK: ret i32
+entry:
+  %0 = icmp eq i32 %y, 0                          ; <i1> [#uses=1]
+  %storemerge = select i1 %0, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 1), i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 0) ; <i32*> [#uses=1]
+  %1 = load i32, i32* %storemerge, align 4             ; <i32> [#uses=1]
+  ret i32 %1
+}
diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll
new file mode 100644
index 00000000000..5129349b394
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load.ll
@@ -0,0 +1,302 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+@X = constant i32 42		; <i32*> [#uses=2]
+@X2 = constant i32 47		; <i32*> [#uses=1]
+@Y = constant [2 x { i32, float }] [ { i32, float } { i32 12, float 1.000000e+00 }, { i32, float } { i32 37, float 0x3FF3B2FEC0000000 } ]		; <[2 x { i32, float }]*> [#uses=2]
+@Z = constant [2 x { i32, float }] zeroinitializer		; <[2 x { i32, float }]*> [#uses=1]
+
+@GLOBAL = internal constant [4 x i32] zeroinitializer
+
+
+define i32 @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 42
+;
+  %B = load i32, i32* @X		; <i32> [#uses=1]
+  ret i32 %B
+}
+
+define float @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret float 0x3FF3B2FEC0000000
+;
+  %A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Y, i64 0, i64 1, i32 1		; <float*> [#uses=1]
+  %B = load float, float* %A		; <float> [#uses=1]
+  ret float %B
+}
+
+define i32 @test3() {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i32 12
+;
+  %A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0		; <i32*> [#uses=1]
+  %B = load i32, i32* %A		; <i32> [#uses=1]
+  ret i32 %B
+}
+
+define i32 @test4() {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret i32 0
+;
+  %A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Z, i64 0, i64 1, i32 0		; <i32*> [#uses=1]
+  %B = load i32, i32* %A		; <i32> [#uses=1]
+  ret i32 %B
+}
+
+define i32 @test5(i1 %C) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[Z:%.*]] = select i1 [[C:%.*]], i32 42, i32 47
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %Y = select i1 %C, i32* @X, i32* @X2		; <i32*> [#uses=1]
+  %Z = load i32, i32* %Y		; <i32> [#uses=1]
+  ret i32 %Z
+}
+
+define i32 @test7(i32 %X) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    store i32 undef, i32* null, align 536870912
+; CHECK-NEXT:    ret i32 undef
+;
+  %V = getelementptr i32, i32* null, i32 %X		; <i32*> [#uses=1]
+  %R = load i32, i32* %V		; <i32> [#uses=1]
+  ret i32 %R
+}
+
+define i32 @test7_no_null_opt(i32 %X) #0 {
+; CHECK-LABEL: @test7_no_null_opt(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[V:%.*]] = getelementptr i32, i32* null, i64 [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = load i32, i32* [[V]], align 4
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %V = getelementptr i32, i32* null, i32 %X               ; <i32*> [#uses=1]
+  %R = load i32, i32* %V          ; <i32> [#uses=1]
+  ret i32 %R
+}
+attributes #0 = { "null-pointer-is-valid"="true" }
+
+define i32 @test8(i32* %P) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret i32 1
+;
+  store i32 1, i32* %P
+  %X = load i32, i32* %P		; <i32> [#uses=1]
+  ret i32 %X
+}
+
+define i32 @test9(i32* %P) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i32 0
+;
+  %X = load i32, i32* %P		; <i32> [#uses=1]
+  %Y = load i32, i32* %P		; <i32> [#uses=1]
+  %Z = sub i32 %X, %Y		; <i32> [#uses=1]
+  ret i32 %Z
+}
+
+define i32 @test10(i1 %C.upgrd.1, i32* %P, i32* %Q) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    br i1 [[C_UPGRD_1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    store i32 1, i32* [[Q:%.*]], align 4
+; CHECK-NEXT:    br label [[C:%.*]]
+; CHECK:       F:
+; CHECK-NEXT:    br label [[C]]
+; CHECK:       C:
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret i32 0
+;
+  br i1 %C.upgrd.1, label %T, label %F
+T:		; preds = %0
+  store i32 1, i32* %Q
+  store i32 0, i32* %P
+  br label %C
+F:		; preds = %0
+  store i32 0, i32* %P
+  br label %C
+C:		; preds = %F, %T
+  %V = load i32, i32* %P		; <i32> [#uses=1]
+  ret i32 %V
+}
+
+define double @test11(double* %p) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[T0:%.*]] = getelementptr double, double* [[P:%.*]], i64 1
+; CHECK-NEXT:    store double 2.000000e+00, double* [[T0]], align 8
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+  %t0 = getelementptr double, double* %p, i32 1
+  store double 2.0, double* %t0
+  %t1 = getelementptr double, double* %p, i32 1
+  %x = load double, double* %t1
+  ret double %x
+}
+
+define i32 @test12(i32* %P) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    ret i32 123
+;
+  %A = alloca i32
+  store i32 123, i32* %A
+  ; Cast the result of the load not the source
+  %Q = bitcast i32* %A to i32*
+  %V = load i32, i32* %Q
+  ret i32 %V
+}
+
+define <16 x i8> @test13(<2 x i64> %x) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %tmp = load <16 x i8>, <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
+  ret <16 x i8> %tmp
+}
+
+; This test must not have the store of %x forwarded to the load -- there is an
+; intervening store if %y. However, the intervening store occurs with a different
+; type and size and to a different pointer value. This is ensuring that none of
+; those confuse the analysis into thinking that the second store does not alias
+; the first.
+
+define i8 @test14(i8 %x, i32 %y) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A_I8:%.*]] = bitcast i32* [[A]] to i8*
+; CHECK-NEXT:    store i8 [[X:%.*]], i8* [[A_I8]], align 4
+; CHECK-NEXT:    store i32 [[Y:%.*]], i32* [[A]], align 4
+; CHECK-NEXT:    [[R:%.*]] = load i8, i8* [[A_I8]], align 4
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a = alloca i32
+  %a.i8 = bitcast i32* %a to i8*
+  store i8 %x, i8* %a.i8
+  store i32 %y, i32* %a
+  %r = load i8, i8* %a.i8
+  ret i8 %r
+}
+
+@test15_global = external global i32
+
+; Same test as @test14 essentially, but using a global instead of an alloca.
+
+define i8 @test15(i8 %x, i32 %y) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    store i8 [[X:%.*]], i8* bitcast (i32* @test15_global to i8*), align 4
+; CHECK-NEXT:    store i32 [[Y:%.*]], i32* @test15_global, align 4
+; CHECK-NEXT:    [[R:%.*]] = load i8, i8* bitcast (i32* @test15_global to i8*), align 4
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %g.i8 = bitcast i32* @test15_global to i8*
+  store i8 %x, i8* %g.i8
+  store i32 %y, i32* @test15_global
+  %r = load i8, i8* %g.i8
+  ret i8 %r
+}
+
+; Check that we canonicalize loads which are only stored to use integer types
+; when there is a valid integer type.
+
+define void @test16(i8* %x, i8* %a, i8* %b, i8* %c) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_CAST:%.*]] = bitcast i8* [[C:%.*]] to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; CHECK-NEXT:    [[X11:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[A:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[X11]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[B:%.*]] to i32*
+; CHECK-NEXT:    store i32 [[X11]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[X]] to i32*
+; CHECK-NEXT:    [[X22:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[B]] to i32*
+; CHECK-NEXT:    store i32 [[X22]], i32* [[TMP4]], align 4
+; CHECK-NEXT:    store i32 [[X22]], i32* [[C_CAST]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %x.cast = bitcast i8* %x to float*
+  %a.cast = bitcast i8* %a to float*
+  %b.cast = bitcast i8* %b to float*
+  %c.cast = bitcast i8* %c to i32*
+
+  %x1 = load float, float* %x.cast
+  store float %x1, float* %a.cast
+  store float %x1, float* %b.cast
+
+  %x2 = load float, float* %x.cast
+  store float %x2, float* %b.cast
+  %x2.cast = bitcast float %x2 to i32
+  store i32 %x2.cast, i32* %c.cast
+
+  ret void
+}
+
+; Check that in cases similar to @test16 we don't try to rewrite a load when
+; its only use is a store but it is used as the pointer to that store rather
+; than the value.
+
+define void @test17(i8** %x, i8 %y) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_LOAD:%.*]] = load i8*, i8** [[X:%.*]], align 8
+; CHECK-NEXT:    store i8 [[Y:%.*]], i8* [[X_LOAD]], align 1
+; CHECK-NEXT:    ret void
+;
+entry:
+  %x.load = load i8*, i8** %x
+  store i8 %y, i8* %x.load
+
+  ret void
+}
+
+; Check that we don't try change the type of the load by inserting a bitcast
+; generating invalid IR.
+%swift.error = type opaque
+declare void @useSwiftError(%swift.error** swifterror)
+
+define void @test18(%swift.error** swifterror %err) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWIFTERROR:%.*]] = alloca swifterror %swift.error*, align 8
+; CHECK-NEXT:    store %swift.error* null, %swift.error** [[SWIFTERROR]], align 8
+; CHECK-NEXT:    call void @useSwiftError(%swift.error** nonnull swifterror [[SWIFTERROR]])
+; CHECK-NEXT:    [[ERR_RES:%.*]] = load %swift.error*, %swift.error** [[SWIFTERROR]], align 8
+; CHECK-NEXT:    store %swift.error* [[ERR_RES]], %swift.error** [[ERR:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %swifterror = alloca swifterror %swift.error*, align 8
+  store %swift.error* null, %swift.error** %swifterror, align 8
+  call void @useSwiftError(%swift.error** nonnull swifterror %swifterror)
+  %err.res = load %swift.error*, %swift.error** %swifterror, align 8
+  store %swift.error* %err.res, %swift.error** %err, align 8
+  ret void
+}
+
+; Make sure we preseve the type of the store to a swifterror pointer.
+
+declare void @initi8(i8**)
+define void @test19(%swift.error** swifterror %err) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    call void @initi8(i8** nonnull [[TMP]])
+; CHECK-NEXT:    [[SWIFTERROR:%.*]] = bitcast i8** [[TMP]] to %swift.error**
+; CHECK-NEXT:    [[ERR_RES:%.*]] = load %swift.error*, %swift.error** [[SWIFTERROR]], align 8
+; CHECK-NEXT:    store %swift.error* [[ERR_RES]], %swift.error** [[ERR:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = alloca i8*, align 8
+  call void @initi8(i8** %tmp)
+  %swifterror = bitcast i8** %tmp to %swift.error**
+  %err.res = load %swift.error*, %swift.error** %swifterror, align 8
+  store %swift.error* %err.res, %swift.error** %err, align 8
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/load3.ll b/llvm/test/Transforms/InstCombine/load3.ll
new file mode 100644
index 00000000000..6db8dd391fa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load3.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.0.0"
+
+; Instcombine should be able to do trivial CSE of loads.
+
+define i32 @test1(i32* %p) {
+  %t0 = getelementptr i32, i32* %p, i32 1
+  %y = load i32, i32* %t0
+  %t1 = getelementptr i32, i32* %p, i32 1
+  %x = load i32, i32* %t1
+  %a = sub i32 %y, %x
+  ret i32 %a
+; CHECK-LABEL: @test1(
+; CHECK: ret i32 0
+}
+
+
+; PR7429
+@.str = private constant [4 x i8] c"XYZ\00"
+define float @test2() {
+  %tmp = load float, float* bitcast ([4 x i8]* @.str to float*), align 1
+  ret float %tmp
+  
+; CHECK-LABEL: @test2(
+; CHECK: ret float 0x3806965600000000
+}
+
+@rslts32 = global [36 x i32] zeroinitializer, align 4
+
+@expect32 = internal constant [36 x i32][ i32 1, i32 2, i32 0, i32 100, i32 3,
+i32 4, i32 0, i32 -7, i32 4, i32 4, i32 8, i32 8, i32 1, i32 3, i32 8, i32 3,
+i32 4, i32 -2, i32 2, i32 8, i32 83, i32 77, i32 8, i32 17, i32 77, i32 88, i32
+22, i32 33, i32 44, i32 88, i32 77, i32 4, i32 4, i32 7, i32 -7, i32 -8] ,
+align 4
+
+; PR14986
+define void @test3() nounwind {
+; This is a weird way of computing zero.
+  %l = load i32, i32* getelementptr ([36 x i32], [36 x i32]* @expect32, i32 29826161, i32 28), align 4
+  store i32 %l, i32* getelementptr ([36 x i32], [36 x i32]* @rslts32, i32 29826161, i32 28), align 4
+  ret void
+
+; CHECK-LABEL: @test3(
+; CHECK: store i32 1, i32* getelementptr inbounds ([36 x i32], [36 x i32]* @rslts32, i32 0, i32 0)
+}
diff --git a/llvm/test/Transforms/InstCombine/load_combine_aa.ll b/llvm/test/Transforms/InstCombine/load_combine_aa.ll
new file mode 100644
index 00000000000..b84b81ddd5d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load_combine_aa.ll
@@ -0,0 +1,15 @@
+; RUN: opt -basicaa -instcombine -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test_load_combine_aa(
+; CHECK: %[[V:.*]] = load i32, i32* %0
+; CHECK: store i32 0, i32* %3
+; CHECK: store i32 %[[V]], i32* %1
+; CHECK: store i32 %[[V]], i32* %2
+define void @test_load_combine_aa(i32*, i32*, i32*, i32* noalias) {
+  %a = load i32, i32* %0
+  store i32 0, i32* %3
+  %b = load i32, i32* %0
+  store i32 %a, i32* %1
+  store i32 %b, i32* %2
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/loadstore-alignment.ll b/llvm/test/Transforms/InstCombine/loadstore-alignment.ll
new file mode 100644
index 00000000000..e821fb27181
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -0,0 +1,90 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-p1:64:64:64-p2:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@x = external global <2 x i64>, align 16
+@xx = external global [13 x <2 x i64>], align 16
+
+@x.as2 = external addrspace(2) global <2 x i64>, align 16
+
+; CHECK-LABEL: @static_hem(
+; CHECK: , align 16
+define <2 x i64> @static_hem() {
+  %t = getelementptr <2 x i64>, <2 x i64>* @x, i32 7
+  %tmp1 = load <2 x i64>, <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @hem(
+; CHECK: , align 16
+define <2 x i64> @hem(i32 %i) {
+  %t = getelementptr <2 x i64>, <2 x i64>* @x, i32 %i
+  %tmp1 = load <2 x i64>, <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @hem_2d(
+; CHECK: , align 16
+define <2 x i64> @hem_2d(i32 %i, i32 %j) {
+  %t = getelementptr [13 x <2 x i64>], [13 x <2 x i64>]* @xx, i32 %i, i32 %j
+  %tmp1 = load <2 x i64>, <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @foo(
+; CHECK: , align 16
+define <2 x i64> @foo() {
+  %tmp1 = load <2 x i64>, <2 x i64>* @x, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @bar(
+; CHECK: , align 16
+; CHECK: , align 16
+define <2 x i64> @bar() {
+  %t = alloca <2 x i64>
+  call void @kip(<2 x i64>* %t)
+  %tmp1 = load <2 x i64>, <2 x i64>* %t, align 1
+  ret <2 x i64> %tmp1
+}
+
+; CHECK-LABEL: @static_hem_store(
+; CHECK: , align 16
+define void @static_hem_store(<2 x i64> %y) {
+  %t = getelementptr <2 x i64>, <2 x i64>* @x, i32 7
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
+}
+
+; CHECK-LABEL: @hem_store(
+; CHECK: , align 16
+define void @hem_store(i32 %i, <2 x i64> %y) {
+  %t = getelementptr <2 x i64>, <2 x i64>* @x, i32 %i
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
+}
+
+; CHECK-LABEL: @hem_2d_store(
+; CHECK: , align 16
+define void @hem_2d_store(i32 %i, i32 %j, <2 x i64> %y) {
+  %t = getelementptr [13 x <2 x i64>], [13 x <2 x i64>]* @xx, i32 %i, i32 %j
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
+}
+
+; CHECK-LABEL: @foo_store(
+; CHECK: , align 16
+define void @foo_store(<2 x i64> %y) {
+  store <2 x i64> %y, <2 x i64>* @x, align 1
+  ret void
+}
+
+; CHECK-LABEL: @bar_store(
+; CHECK: , align 16
+define void @bar_store(<2 x i64> %y) {
+  %t = alloca <2 x i64>
+  call void @kip(<2 x i64>* %t)
+  store <2 x i64> %y, <2 x i64>* %t, align 1
+  ret void
+}
+
+declare void @kip(<2 x i64>* %t)
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
new file mode 100644
index 00000000000..5916a8d3a4c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -0,0 +1,150 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test_load_cast_combine_tbaa(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
+; CHECK-LABEL: @test_load_cast_combine_tbaa(
+; CHECK: load i32, i32* %{{.*}}, !tbaa !0
+entry:
+  %l = load float, float* %ptr, !tbaa !0
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i32 @test_load_cast_combine_noalias(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata.
+; CHECK-LABEL: @test_load_cast_combine_noalias(
+; CHECK: load i32, i32* %{{.*}}, !alias.scope !3, !noalias !4
+entry:
+  %l = load float, float* %ptr, !alias.scope !3, !noalias !4
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define float @test_load_cast_combine_range(i32* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) drops range metadata. It
+; would be nice to preserve or update it somehow but this is hard when moving
+; between types.
+; CHECK-LABEL: @test_load_cast_combine_range(
+; CHECK: load float, float* %{{.*}}
+; CHECK-NOT: !range
+; CHECK: ret float
+entry:
+  %l = load i32, i32* %ptr, !range !5
+  %c = bitcast i32 %l to float
+  ret float %c
+}
+
+define i32 @test_load_cast_combine_invariant(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata.
+; CHECK-LABEL: @test_load_cast_combine_invariant(
+; CHECK: load i32, i32* %{{.*}}, !invariant.load !7
+entry:
+  %l = load float, float* %ptr, !invariant.load !6
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i32 @test_load_cast_combine_nontemporal(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_nontemporal(
+; CHECK: load i32, i32* %{{.*}}, !nontemporal !8
+entry:
+  %l = load float, float* %ptr, !nontemporal !7
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i8* @test_load_cast_combine_align(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves align
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_align(
+; CHECK: load i8*, i8** %{{.*}}, !align !9
+entry:
+  %l = load i32*, i32** %ptr, !align !8
+  %c = bitcast i32* %l to i8*
+  ret i8* %c
+}
+
+define i8* @test_load_cast_combine_deref(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves dereferenceable
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_deref(
+; CHECK: load i8*, i8** %{{.*}}, !dereferenceable !9
+entry:
+  %l = load i32*, i32** %ptr, !dereferenceable !8
+  %c = bitcast i32* %l to i8*
+  ret i8* %c
+}
+
+define i8* @test_load_cast_combine_deref_or_null(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves
+; dereferenceable_or_null metadata.
+; CHECK-LABEL: @test_load_cast_combine_deref_or_null(
+; CHECK: load i8*, i8** %{{.*}}, !dereferenceable_or_null !9
+entry:
+  %l = load i32*, i32** %ptr, !dereferenceable_or_null !8
+  %c = bitcast i32* %l to i8*
+  ret i8* %c
+}
+
+define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_loop(
+; CHECK: load i32, i32* %{{.*}}, !llvm.access.group !6
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %src.gep = getelementptr inbounds float, float* %src, i32 %i
+  %dst.gep = getelementptr inbounds i32, i32* %dst, i32 %i
+  %l = load float, float* %src.gep, !llvm.access.group !9
+  %c = bitcast float %l to i32
+  store i32 %c, i32* %dst.gep
+  %i.next = add i32 %i, 1
+  %cmp = icmp slt i32 %i.next, %n
+  br i1 %cmp, label %loop, label %exit, !llvm.loop !1
+
+exit:
+  ret void
+}
+
+define void @test_load_cast_combine_nonnull(float** %ptr) {
+; We can't preserve nonnull metadata when converting a load of a pointer to
+; a load of an integer. Instead, we translate it to range metadata.
+; FIXME: We should also transform range metadata back into nonnull metadata.
+; FIXME: This test is very fragile. If any LABEL lines are added after
+; this point, the test will fail, because this test depends on a metadata tuple,
+; which is always emitted at the end of the file. At some point, we should
+; consider an option to the IR printer to emit MD tuples after the function
+; that first uses them--this will allow us to refer to them like this and not
+; have the tests break. For now, this function must always come last in this
+; file, and no LABEL lines are to be added after this point.
+;
+; CHECK-LABEL: @test_load_cast_combine_nonnull(
+; CHECK: %[[V:.*]] = load i64, i64* %{{.*}}, !range ![[MD:[0-9]+]]
+; CHECK-NOT: !nonnull
+; CHECK: store i64 %[[V]], i64*
+entry:
+  %p = load float*, float** %ptr, !nonnull !6
+  %gep = getelementptr float*, float** %ptr, i32 42
+  store float* %p, float** %gep
+  ret void
+}
+
+; This is the metadata tuple that we reference above:
+; CHECK: ![[MD]] = !{i64 1, i64 0}
+!0 = !{!1, !1, i64 0}
+!1 = !{!"scalar type", !2}
+!2 = !{!"root"}
+!3 = distinct !{!3, !4}
+!4 = distinct !{!4, !{!"llvm.loop.parallel_accesses", !9}}
+!5 = !{i32 0, i32 42}
+!6 = !{}
+!7 = !{i32 1}
+!8 = !{i64 8}
+!9 = distinct !{}
diff --git a/llvm/test/Transforms/InstCombine/log-pow-nofastmath.ll b/llvm/test/Transforms/InstCombine/log-pow-nofastmath.ll
new file mode 100644
index 00000000000..faaef97311e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/log-pow-nofastmath.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mylog(double %x, double %y) {
+entry:
+  %pow = call double @llvm.pow.f64(double %x, double %y)
+  %call = call double @log(double %pow)
+  ret double %call
+}
+
+; CHECK-LABEL: define double @mylog(
+; CHECK:   %pow = call double @llvm.pow.f64(double %x, double %y)
+; CHECK:   %call = call double @log(double %pow)
+; CHECK:   ret double %call
+; CHECK: }
+
+define double @test3(double %x) {
+  %call2 = call double @exp2(double %x)
+  %call3 = call double @log(double %call2)
+  ret double %call3
+}
+
+; CHECK-LABEL: @test3
+; CHECK:   %call2 = call double @exp2(double %x)
+; CHECK:   %call3 = call double @log(double %call2)
+; CHECK:   ret double %call3
+; CHECK: }
+
+declare double @log(double)
+declare double @exp2(double)
+declare double @llvm.pow.f64(double, double)
diff --git a/llvm/test/Transforms/InstCombine/log-pow.ll b/llvm/test/Transforms/InstCombine/log-pow.ll
new file mode 100644
index 00000000000..4e4a2b2612c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/log-pow.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @log_pow(double %x, double %y) {
+  %pow = call fast double @llvm.pow.f64(double %x, double %y)
+  %call = call fast double @log(double %pow)
+  ret double %call
+}
+
+; CHECK-LABEL: define double @log_pow(
+; CHECK-NEXT:  %log = call fast double @log(double %x)
+; CHECK-NEXT:  %mul = fmul fast double %log, %y
+; CHECK-NEXT:  ret double %mul
+
+define double @log_pow_not_fast(double %x, double %y) {
+  %pow = call double @llvm.pow.f64(double %x, double %y)
+  %call = call fast double @log(double %pow)
+  ret double %call
+}
+
+; CHECK-LABEL: define double @log_pow_not_fast(
+; CHECK-NEXT:  %pow = call double @llvm.pow.f64(double %x, double %y)
+; CHECK-NEXT:  %call = call fast double @log(double %pow)
+; CHECK-NEXT:  ret double %call
+
+define double @function_pointer(double ()* %fptr, double %p1) {
+  %call1 = call double %fptr()
+  %pow = call double @log(double %call1)
+  ret double %pow
+}
+
+; CHECK-LABEL: @function_pointer
+; CHECK-NEXT:  %call1 = call double %fptr()
+; CHECK-NEXT:  %pow = call double @log(double %call1)
+; CHECK-NEXT:  ret double %pow
+
+define double @log_exp2(double %x) {
+  %call2 = call fast double @exp2(double %x)
+  %call3 = call fast double @log(double %call2)
+  ret double %call3
+}
+
+; CHECK-LABEL: @log_exp2
+; CHECK-NEXT:  %call2 = call fast double @exp2(double %x)
+; CHECK-NEXT:  %logmul = fmul fast double %x, 0x3FE62E42FEFA39EF
+; CHECK-NEXT:  ret double %logmul
+
+define double @log_exp2_not_fast(double %x) {
+  %call2 = call double @exp2(double %x)
+  %call3 = call fast double @log(double %call2)
+  ret double %call3
+}
+
+; CHECK-LABEL: @log_exp2_not_fast
+; CHECK-NEXT:  %call2 = call double @exp2(double %x)
+; CHECK-NEXT:  %call3 = call fast double @log(double %call2)
+; CHECK-NEXT:  ret double %call3
+
+declare double @log(double) #0
+declare double @exp2(double)
+declare double @llvm.pow.f64(double, double)
+
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll
new file mode 100644
index 00000000000..3f02554e7de
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/logical-select.ll
@@ -0,0 +1,637 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+
+define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[E:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[E]], i32 [[C:%.*]], i32 [[D:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %e = icmp slt i32 %a, %b
+  %f = sext i1 %e to i32
+  %g = and i32 %c, %f
+  %h = xor i32 %f, -1
+  %i = and i32 %d, %h
+  %j = or i32 %g, %i
+  ret i32 %j
+}
+
+define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[E:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[E]], i32 [[C:%.*]], i32 [[D:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %e = icmp slt i32 %a, %b
+  %f = sext i1 %e to i32
+  %g = and i32 %c, %f
+  %h = xor i32 %f, -1
+  %i = and i32 %d, %h
+  %j = or i32 %i, %g
+  ret i32 %j
+}
+
+define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @goo(
+; CHECK-NEXT:    [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t0 = icmp slt i32 %a, %b
+  %iftmp.0.0 = select i1 %t0, i32 -1, i32 0
+  %t1 = and i32 %iftmp.0.0, %c
+  %not = xor i32 %iftmp.0.0, -1
+  %t2 = and i32 %not, %d
+  %t3 = or i32 %t1, %t2
+  ret i32 %t3
+}
+
+define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @poo(
+; CHECK-NEXT:    [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]]
+; CHECK-NEXT:    ret i32 [[T3]]
+;
+  %t0 = icmp slt i32 %a, %b
+  %iftmp.0.0 = select i1 %t0, i32 -1, i32 0
+  %t1 = and i32 %iftmp.0.0, %c
+  %iftmp = select i1 %t0, i32 0, i32 -1
+  %t2 = and i32 %iftmp, %d
+  %t3 = or i32 %t1, %t2
+  ret i32 %t3
+}
+
+; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791
+; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
+
+define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @fold_inverted_icmp_preds(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[A]], [[B]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 [[D:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %cmp1 = icmp slt i32 %a, %b
+  %sel1 = select i1 %cmp1, i32 %c, i32 0
+  %cmp2 = icmp sge i32 %a, %b
+  %sel2 = select i1 %cmp2, i32 %d, i32 0
+  %or = or i32 %sel1, %sel2
+  ret i32 %or
+}
+
+; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
+
+define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @fold_inverted_icmp_preds_reverse(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 [[C:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[A]], [[B]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 [[D:%.*]], i32 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %cmp1 = icmp slt i32 %a, %b
+  %sel1 = select i1 %cmp1, i32 0, i32 %c
+  %cmp2 = icmp sge i32 %a, %b
+  %sel2 = select i1 %cmp2, i32 0, i32 %d
+  %or = or i32 %sel1, %sel2
+  ret i32 %or
+}
+
+; TODO: Should fcmp have the same sort of predicate canonicalization as icmp?
+
+define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @fold_inverted_fcmp_preds(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp uge float [[A]], [[B]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 [[D:%.*]], i32 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %cmp1 = fcmp olt float %a, %b
+  %sel1 = select i1 %cmp1, i32 %c, i32 0
+  %cmp2 = fcmp uge float %a, %b
+  %sel2 = select i1 %cmp2, i32 %d, i32 0
+  %or = or i32 %sel1, %sel2
+  ret i32 %or
+}
+
+; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
+
+define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
+; CHECK-LABEL: @fold_inverted_icmp_vector_preds(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> [[C:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq <2 x i32> [[A]], [[B]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[D:%.*]], <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <2 x i32> [[OR]]
+;
+  %cmp1 = icmp ne <2 x i32> %a, %b
+  %sel1 = select <2 x i1> %cmp1, <2 x i32> %c, <2 x i32> <i32 0, i32 0>
+  %cmp2 = icmp eq <2 x i32> %a, %b
+  %sel2 = select <2 x i1> %cmp2, <2 x i32> %d, <2 x i32> <i32 0, i32 0>
+  %or = or <2 x i32> %sel1, %sel2
+  ret <2 x i32> %or
+}
+
+define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @par(
+; CHECK-NEXT:    [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t0 = icmp slt i32 %a, %b
+  %iftmp.1.0 = select i1 %t0, i32 -1, i32 0
+  %t1 = and i32 %iftmp.1.0, %c
+  %not = xor i32 %iftmp.1.0, -1
+  %t2 = and i32 %not, %d
+  %t3 = or i32 %t1, %t2
+  ret i32 %t3
+}
+
+; In the following tests (8 commutation variants), verify that a bitcast doesn't get
+; in the way of a select transform. These bitcasts are common in SSE/AVX and possibly
+; other vector code because of canonicalization to i64 elements for vectors.
+
+; The fptosi instructions are included to avoid commutation canonicalization based on
+; operator weight. Using another cast operator ensures that both operands of all logic
+; ops are equally weighted, and this ensures that we're testing all commutation
+; possibilities.
+
+define <2 x i64> @bitcast_select_swap0(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap0(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %bc1, %sia
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %bc2, %sib
+  %or = or <2 x i64> %and1, %and2
+  ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap1(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap1(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %bc1, %sia
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %bc2, %sib
+  %or = or <2 x i64> %and2, %and1
+  ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap2(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap2(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %bc1, %sia
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %sib, %bc2
+  %or = or <2 x i64> %and1, %and2
+  ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap3(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap3(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %bc1, %sia
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %sib, %bc2
+  %or = or <2 x i64> %and2, %and1
+  ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap4(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap4(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %sia, %bc1
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %bc2, %sib
+  %or = or <2 x i64> %and1, %and2
+  ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap5(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap5(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %sia, %bc1
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %bc2, %sib
+  %or = or <2 x i64> %and2, %and1
+  ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap6(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap6(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %sia, %bc1
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %sib, %bc2
+  %or = or <2 x i64> %and1, %and2
+  ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap7(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap7(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %sia, %bc1
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %sib, %bc2
+  %or = or <2 x i64> %and2, %and1
+  ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @bitcast_select_multi_uses(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i64> [[BC1]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
+; CHECK-NEXT:    [[BC2:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[AND2:%.*]] = and <2 x i64> [[BC2]], [[B:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]]
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i64> [[OR]], [[ADD]]
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+  %and1 = and <2 x i64> %a, %bc1
+  %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+  %and2 = and <2 x i64> %b, %bc2
+  %or = or <2 x i64> %and2, %and1
+  %add = add <2 x i64> %and2, %bc2
+  %sub = sub <2 x i64> %or, %add
+  ret <2 x i64> %sub
+}
+
+define i1 @bools(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: @bools(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %not = xor i1 %c, -1
+  %and1 = and i1 %not, %a
+  %and2 = and i1 %c, %b
+  %or = or i1 %and1, %and2
+  ret i1 %or
+}
+
+; Form a select if we know we can get replace 2 simple logic ops.
+
+define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: @bools_multi_uses1(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    [[AND1:%.*]] = and i1 [[NOT]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 [[A]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i1 [[TMP1]], [[AND1]]
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %not = xor i1 %c, -1
+  %and1 = and i1 %not, %a
+  %and2 = and i1 %c, %b
+  %or = or i1 %and1, %and2
+  %xor = xor i1 %or, %and1
+  ret i1 %xor
+}
+
+; Don't replace a cheap logic op with a potentially expensive select
+; unless we can also eliminate one of the other original ops.
+
+define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: @bools_multi_uses2(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %not = xor i1 %c, -1
+  %and1 = and i1 %not, %a
+  %and2 = and i1 %c, %b
+  %or = or i1 %and1, %and2
+  %add = add i1 %and1, %and2
+  %and3 = and i1 %or, %add
+  ret i1 %and3
+}
+
+define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) {
+; CHECK-LABEL: @vec_of_bools(
+; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[B:%.*]], <4 x i1> [[A:%.*]]
+; CHECK-NEXT:    ret <4 x i1> [[TMP1]]
+;
+  %not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true>
+  %and1 = and <4 x i1> %not, %a
+  %and2 = and <4 x i1> %b, %c
+  %or = or <4 x i1> %and2, %and1
+  ret <4 x i1> %or
+}
+
+define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) {
+; CHECK-LABEL: @vec_of_casted_bools(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i4 [[A:%.*]] to <4 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i4 [[B:%.*]] to <4 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP2]], <4 x i1> [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
+; CHECK-NEXT:    ret i4 [[TMP4]]
+;
+  %not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true>
+  %bc1 = bitcast <4 x i1> %not to i4
+  %bc2 = bitcast <4 x i1> %c to i4
+  %and1 = and i4 %a, %bc1
+  %and2 = and i4 %bc2, %b
+  %or = or i4 %and1, %and2
+  ret i4 %or
+}
+
+; Inverted 'and' constants mean this is a select which is canonicalized to a shuffle.
+
+define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @vec_sel_consts(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 -1>
+  %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 0>
+  %or = or <4 x i32> %and1, %and2
+  ret <4 x i32> %or
+}
+
+define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) {
+; CHECK-LABEL: @vec_sel_consts_weird(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i129> [[A:%.*]], <3 x i129> [[B:%.*]], <3 x i32> <i32 0, i32 4, i32 2>
+; CHECK-NEXT:    ret <3 x i129> [[TMP1]]
+;
+  %and1 = and <3 x i129> %a, <i129 -1, i129 0, i129 -1>
+  %and2 = and <3 x i129> %b, <i129 0, i129 -1, i129 0>
+  %or = or <3 x i129> %and2, %and1
+  ret <3 x i129> %or
+}
+
+; The mask elements must be inverted for this to be a select.
+
+define <4 x i32> @vec_not_sel_consts(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @vec_not_sel_consts(
+; CHECK-NEXT:    [[AND1:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[AND2:%.*]] = and <4 x i32> [[B:%.*]], <i32 0, i32 -1, i32 0, i32 -1>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]]
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
+  %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1>
+  %or = or <4 x i32> %and1, %and2
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @vec_not_sel_consts_undef_elts(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @vec_not_sel_consts_undef_elts(
+; CHECK-NEXT:    [[AND1:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1, i32 undef, i32 0, i32 0>
+; CHECK-NEXT:    [[AND2:%.*]] = and <4 x i32> [[B:%.*]], <i32 0, i32 -1, i32 0, i32 undef>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]]
+; CHECK-NEXT:    ret <4 x i32> [[OR]]
+;
+  %and1 = and <4 x i32> %a, <i32 -1, i32 undef, i32 0, i32 0>
+  %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 undef>
+  %or = or <4 x i32> %and1, %and2
+  ret <4 x i32> %or
+}
+
+; The inverted constants may be operands of xor instructions.
+
+define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
+; CHECK-LABEL: @vec_sel_xor(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], <i1 false, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %mask = sext <4 x i1> %c to <4 x i32>
+  %mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
+  %not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
+  %and1 = and <4 x i32> %not_mask_flip1, %a
+  %and2 = and <4 x i32> %mask_flip1, %b
+  %or = or <4 x i32> %and1, %and2
+  ret <4 x i32> %or
+}
+
+; Allow the transform even if the mask values have multiple uses because
+; there's still a net reduction of instructions from removing the and/and/or.
+
+define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
+; CHECK-LABEL: @vec_sel_xor_multi_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], <i1 true, i1 false, i1 false, i1 false>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> [[C]], <i1 false, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[ADD:%.*]] = sub <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    ret <4 x i32> [[ADD]]
+;
+  %mask = sext <4 x i1> %c to <4 x i32>
+  %mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
+  %not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
+  %and1 = and <4 x i32> %not_mask_flip1, %a
+  %and2 = and <4 x i32> %mask_flip1, %b
+  %or = or <4 x i32> %and1, %and2
+  %add = add <4 x i32> %or, %mask_flip1
+  ret <4 x i32> %add
+}
+
+; The 'ashr' guarantees that we have a bitmask, so this is select with truncated condition.
+
+define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) {
+; CHECK-LABEL: @allSignBits(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[COND:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %bitmask = ashr i32 %cond, 31
+  %not_bitmask = xor i32 %bitmask, -1
+  %a1 = and i32 %tval, %bitmask
+  %a2 = and i32 %not_bitmask, %fval
+  %sel = or i32 %a1, %a2
+  ret i32 %sel
+}
+
+define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) {
+; CHECK-LABEL: @allSignBits_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]]
+; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
+;
+  %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7>
+  %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1>
+  %a1 = and <4 x i8> %tval, %bitmask
+  %a2 = and <4 x i8> %fval, %not_bitmask
+  %sel = or <4 x i8> %a2, %a1
+  ret <4 x i8> %sel
+}
+
+; Negative test - make sure that bitcasts from FP do not cause a crash.
+
+define <2 x i64> @fp_bitcast(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @fp_bitcast(
+; CHECK-NEXT:    [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i64> [[SIA]], [[BC1]]
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+; CHECK-NEXT:    [[AND2:%.*]] = and <2 x i64> [[SIB]], [[BC2]]
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i64> [[OR]]
+;
+  %sia = fptosi <2 x double> %a to <2 x i64>
+  %sib = fptosi <2 x double> %b to <2 x i64>
+  %bc1 = bitcast <2 x double> %a to <2 x i64>
+  %and1 = and <2 x i64> %sia, %bc1
+  %bc2 = bitcast <2 x double> %b to <2 x i64>
+  %and2 = and <2 x i64> %sib, %bc2
+  %or = or <2 x i64> %and2, %and1
+  ret <2 x i64> %or
+}
+
+define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: @computesignbits_through_shuffles(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <4 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT:    [[SHUF_OR1:%.*]] = or <4 x i32> [[S1]], [[S2]]
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT:    [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> [[SHUF_OR2]] to <4 x i1>
+; CHECK-NEXT:    [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[DOTV]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %cmp = fcmp ole <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %s1 = shufflevector <4 x i32> %sext, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  %s2 = shufflevector <4 x i32> %sext, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+  %shuf_or1 = or <4 x i32> %s1, %s2
+  %s3 = shufflevector <4 x i32> %shuf_or1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  %s4 = shufflevector <4 x i32> %shuf_or1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+  %shuf_or2 = or <4 x i32> %s3, %s4
+  %not_or2 = xor <4 x i32> %shuf_or2, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %xbc = bitcast <4 x float> %x to <4 x i32>
+  %zbc = bitcast <4 x float> %z to <4 x i32>
+  %and1 = and <4 x i32> %not_or2, %xbc
+  %and2 = and <4 x i32> %shuf_or2, %zbc
+  %sel = or <4 x i32> %and1, %and2
+  ret <4 x i32> %sel
+}
+
+define <4 x i32> @computesignbits_through_two_input_shuffle(<4 x i32> %x, <4 x i32> %y, <4 x i1> %cond1, <4 x i1> %cond2) {
+; CHECK-LABEL: @computesignbits_through_two_input_shuffle(
+; CHECK-NEXT:    [[SEXT1:%.*]] = sext <4 x i1> [[COND1:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SEXT2:%.*]] = sext <4 x i1> [[COND2:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[COND:%.*]] = shufflevector <4 x i32> [[SEXT1]], <4 x i32> [[SEXT2]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> [[COND]] to <4 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %sext1 = sext <4 x i1> %cond1 to <4 x i32>
+  %sext2 = sext <4 x i1> %cond2 to <4 x i32>
+  %cond = shufflevector <4 x i32> %sext1, <4 x i32> %sext2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %notcond = xor <4 x i32> %cond, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %and1 = and <4 x i32> %notcond, %x
+  %and2 = and <4 x i32> %cond, %y
+  %sel = or <4 x i32> %and1, %and2
+  ret <4 x i32> %sel
+}
+
diff --git a/llvm/test/Transforms/InstCombine/lower-dbg-declare.ll b/llvm/test/Transforms/InstCombine/lower-dbg-declare.ll
new file mode 100644
index 00000000000..44de3fc7531
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/lower-dbg-declare.ll
@@ -0,0 +1,183 @@
+; RUN: opt -instcombine < %s -S | FileCheck %s
+
+; This tests dbg.declare lowering for CallInst users of an alloca. The
+; resulting dbg.value expressions should add a deref to the declare's expression.
+
+; Hand-reduced from this example (-g -Og -fsanitize=address):
+
+;   static volatile int sink;
+;   struct OneElementVector {
+;     int Element;
+;     OneElementVector(int Element) : Element(Element) { sink = Element; }
+;     bool empty() const { return false; }
+;   };
+;   using container = OneElementVector;
+;   static void escape(container &c) { sink = c.Element; }
+;   int main() {
+;     container d1 = {42};
+;     while (!d1.empty())
+;       escape(d1);
+;     return 0;
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.13.0"
+
+%struct.OneElementVector = type { i32 }
+
+define i1 @escape(%struct.OneElementVector* %d1) {
+  ret i1 false
+}
+
+; CHECK-LABEL: @main
+define i32 @main() !dbg !15 {
+entry:
+  %d1 = alloca %struct.OneElementVector, align 4
+  %0 = bitcast %struct.OneElementVector* %d1 to i8*, !dbg !34
+
+; CHECK: dbg.value(metadata %struct.OneElementVector* [[var:%.*]], metadata !DIExpression(DW_OP_deref))
+; CHECK-NEXT: call i1 @escape
+  call void @llvm.dbg.declare(metadata %struct.OneElementVector* %d1, metadata !19, metadata !DIExpression()), !dbg !35
+  call i1 @escape(%struct.OneElementVector* %d1)
+  br label %while.cond, !dbg !37
+
+while.cond:                                       ; preds = %while.body, %entry
+; CHECK: dbg.value(metadata %struct.OneElementVector* [[var]], metadata !DIExpression(DW_OP_deref))
+; CHECK-NEXT: call i1 @escape
+  %call = call i1 @escape(%struct.OneElementVector* %d1), !dbg !38
+  %lnot = xor i1 %call, true, !dbg !39
+  br i1 %lnot, label %while.body, label %while.end, !dbg !37
+
+while.body:                                       ; preds = %while.cond
+; CHECK: dbg.value(metadata %struct.OneElementVector* [[var]], metadata !DIExpression(DW_OP_deref))
+; CHECK-NEXT: call i1 @escape
+  call i1 @escape(%struct.OneElementVector* %d1)
+  br label %while.cond, !dbg !37, !llvm.loop !42
+
+while.end:                                        ; preds = %while.cond
+  ret i32 0, !dbg !45
+}
+
+; CHECK-LABEL: @main2
+define i32 @main2() {
+entry:
+  %d1 = alloca %struct.OneElementVector, align 4
+  %0 = bitcast %struct.OneElementVector* %d1 to i8*, !dbg !34
+
+; CHECK: dbg.value(metadata %struct.OneElementVector* [[var:%.*]], metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_deref))
+; CHECK-NEXT: call i1 @escape
+  call void @llvm.dbg.declare(metadata %struct.OneElementVector* %d1, metadata !19, metadata !DIExpression(DW_OP_lit0, DW_OP_mul)), !dbg !35
+  call i1 @escape(%struct.OneElementVector* %d1)
+  br label %while.cond, !dbg !37
+
+while.cond:                                       ; preds = %while.body, %entry
+; CHECK: dbg.value(metadata %struct.OneElementVector* [[var]], metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_deref))
+; CHECK-NEXT: call i1 @escape
+  %call = call i1 @escape(%struct.OneElementVector* %d1), !dbg !38
+  %lnot = xor i1 %call, true, !dbg !39
+  br i1 %lnot, label %while.body, label %while.end, !dbg !37
+
+while.body:                                       ; preds = %while.cond
+; CHECK: dbg.value(metadata %struct.OneElementVector* [[var]], metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_deref))
+; CHECK-NEXT: call i1 @escape
+  call i1 @escape(%struct.OneElementVector* %d1)
+  br label %while.cond, !dbg !37, !llvm.loop !42
+
+while.end:                                        ; preds = %while.cond
+  ret i32 0, !dbg !45
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!2}
+!llvm.asan.globals = !{!8}
+!llvm.module.flags = !{!10, !11, !12, !13}
+!llvm.ident = !{!14}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "sink", linkageName: "_ZL4sink", scope: !2, file: !3, line: 1, type: !6, isLocal: true, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 7.0.0 (trunk 337207) (llvm/trunk 337204)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "test.cc", directory: "/Users/vsk/src/builds/llvm.org-master-RA")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !{}
+!9 = !{!"test.cc", i32 1, i32 21}
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i32 1, !"wchar_size", i32 4}
+!13 = !{i32 7, !"PIC Level", i32 2}
+!14 = !{!"clang version 7.0.0 (trunk 337207) (llvm/trunk 337204)"}
+!15 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 18, type: !16, isLocal: false, isDefinition: true, scopeLine: 18, flags: DIFlagPrototyped, isOptimized: true, unit: !2, retainedNodes: !18)
+!16 = !DISubroutineType(types: !17)
+!17 = !{!7}
+!18 = !{!19}
+!19 = !DILocalVariable(name: "d1", scope: !15, file: !3, line: 21, type: !20)
+!20 = !DIDerivedType(tag: DW_TAG_typedef, name: "container", file: !3, line: 12, baseType: !21)
+!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "OneElementVector", file: !3, line: 3, size: 32, flags: DIFlagTypePassByValue, elements: !22, identifier: "_ZTS16OneElementVector")
+!22 = !{!23, !24, !28}
+!23 = !DIDerivedType(tag: DW_TAG_member, name: "Element", scope: !21, file: !3, line: 4, baseType: !7, size: 32)
+!24 = !DISubprogram(name: "OneElementVector", scope: !21, file: !3, line: 6, type: !25, isLocal: false, isDefinition: false, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true)
+!25 = !DISubroutineType(types: !26)
+!26 = !{null, !27, !7}
+!27 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !21, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!28 = !DISubprogram(name: "empty", linkageName: "_ZNK16OneElementVector5emptyEv", scope: !21, file: !3, line: 8, type: !29, isLocal: false, isDefinition: false, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true)
+!29 = !DISubroutineType(types: !30)
+!30 = !{!31, !32}
+!31 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
+!32 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !33, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!33 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !21)
+!34 = !DILocation(line: 21, column: 3, scope: !15)
+!35 = !DILocation(line: 21, column: 13, scope: !15)
+!36 = !DILocation(line: 21, column: 18, scope: !15)
+!37 = !DILocation(line: 22, column: 3, scope: !15)
+!38 = !DILocation(line: 22, column: 14, scope: !15)
+!39 = !DILocation(line: 22, column: 10, scope: !15)
+!40 = !DILocation(line: 23, column: 5, scope: !41)
+!41 = distinct !DILexicalBlock(scope: !15, file: !3, line: 22, column: 23)
+!42 = distinct !{!42, !37, !43}
+!43 = !DILocation(line: 24, column: 3, scope: !15)
+!44 = !DILocation(line: 26, column: 1, scope: !15)
+!45 = !DILocation(line: 25, column: 3, scope: !15)
+!46 = distinct !DISubprogram(name: "OneElementVector", linkageName: "_ZN16OneElementVectorC1Ei", scope: !21, file: !3, line: 6, type: !25, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !2, declaration: !24, retainedNodes: !47)
+!47 = !{!48, !50}
+!48 = !DILocalVariable(name: "this", arg: 1, scope: !46, type: !49, flags: DIFlagArtificial | DIFlagObjectPointer)
+!49 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !21, size: 64)
+!50 = !DILocalVariable(name: "Element", arg: 2, scope: !46, file: !3, line: 6, type: !7)
+!51 = !DILocation(line: 0, scope: !46)
+!52 = !DILocation(line: 6, column: 24, scope: !46)
+!53 = !DILocation(line: 6, column: 52, scope: !46)
+!54 = !DILocation(line: 6, column: 70, scope: !46)
+!55 = distinct !DISubprogram(name: "empty", linkageName: "_ZNK16OneElementVector5emptyEv", scope: !21, file: !3, line: 8, type: !29, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !2, declaration: !28, retainedNodes: !56)
+!56 = !{!57}
+!57 = !DILocalVariable(name: "this", arg: 1, scope: !55, type: !58, flags: DIFlagArtificial | DIFlagObjectPointer)
+!58 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !33, size: 64)
+!59 = !DILocation(line: 0, scope: !55)
+!60 = !DILocation(line: 8, column: 24, scope: !55)
+!61 = distinct !DISubprogram(name: "escape", linkageName: "_ZL6escapeR16OneElementVector", scope: !3, file: !3, line: 14, type: !62, isLocal: true, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: true, unit: !2, retainedNodes: !65)
+!62 = !DISubroutineType(types: !63)
+!63 = !{null, !64}
+!64 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !20, size: 64)
+!65 = !{!66}
+!66 = !DILocalVariable(name: "c", arg: 1, scope: !61, file: !3, line: 14, type: !64)
+!67 = !DILocation(line: 14, column: 31, scope: !61)
+!68 = !DILocation(line: 15, column: 12, scope: !61)
+!69 = !{!70, !71, i64 0}
+!70 = !{!"_ZTS16OneElementVector", !71, i64 0}
+!71 = !{!"int", !72, i64 0}
+!72 = !{!"omnipotent char", !73, i64 0}
+!73 = !{!"Simple C++ TBAA"}
+!74 = !DILocation(line: 15, column: 8, scope: !61)
+!75 = !{!71, !71, i64 0}
+!76 = !DILocation(line: 16, column: 1, scope: !61)
+!77 = distinct !DISubprogram(name: "OneElementVector", linkageName: "_ZN16OneElementVectorC2Ei", scope: !21, file: !3, line: 6, type: !25, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !2, declaration: !24, retainedNodes: !78)
+!78 = !{!79, !80}
+!79 = !DILocalVariable(name: "this", arg: 1, scope: !77, type: !49, flags: DIFlagArtificial | DIFlagObjectPointer)
+!80 = !DILocalVariable(name: "Element", arg: 2, scope: !77, file: !3, line: 6, type: !7)
+!81 = !DILocation(line: 0, scope: !77)
+!82 = !DILocation(line: 6, column: 24, scope: !77)
+!83 = !DILocation(line: 6, column: 35, scope: !77)
+!84 = !DILocation(line: 6, column: 59, scope: !85)
+!85 = distinct !DILexicalBlock(scope: !77, file: !3, line: 6, column: 52)
+!86 = !DILocation(line: 6, column: 70, scope: !77)
diff --git a/llvm/test/Transforms/InstCombine/lshr-phi.ll b/llvm/test/Transforms/InstCombine/lshr-phi.ll
new file mode 100644
index 00000000000..91fd2981e5b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/lshr-phi.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Instcombine should be able to eliminate the lshr, because only
+; bits in the operand which might be non-zero will be shifted
+; off the end.
+
+define i32 @hash_string(i8* nocapture %key) nounwind readonly {
+; CHECK-LABEL: @hash_string(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = load i8, i8* [[KEY:%.*]], align 1
+; CHECK-NEXT:    [[T1:%.*]] = icmp eq i8 [[T0]], 0
+; CHECK-NEXT:    br i1 [[T1]], label [[BB2:%.*]], label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[T:%.*]], [[BB]] ]
+; CHECK-NEXT:    [[K_04:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[T8:%.*]], [[BB]] ]
+; CHECK-NEXT:    [[CP_05:%.*]] = getelementptr i8, i8* [[KEY]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[T2:%.*]] = shl nuw nsw i32 [[K_04]], 1
+; CHECK-NEXT:    [[T5:%.*]] = load i8, i8* [[CP_05]], align 1
+; CHECK-NEXT:    [[T6:%.*]] = sext i8 [[T5]] to i32
+; CHECK-NEXT:    [[T7:%.*]] = xor i32 [[T2]], [[T6]]
+; CHECK-NEXT:    [[T8]] = and i32 [[T7]], 16383
+; CHECK-NEXT:    [[T]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[KEY]], i64 [[T]]
+; CHECK-NEXT:    [[T9:%.*]] = load i8, i8* [[SCEVGEP]], align 1
+; CHECK-NEXT:    [[T10:%.*]] = icmp eq i8 [[T9]], 0
+; CHECK-NEXT:    br i1 [[T10]], label [[BB2]], label [[BB]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[K_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[T8]], [[BB]] ]
+; CHECK-NEXT:    ret i32 [[K_0_LCSSA]]
+;
+entry:
+  %t0 = load i8, i8* %key, align 1
+  %t1 = icmp eq i8 %t0, 0
+  br i1 %t1, label %bb2, label %bb
+
+bb:
+  %indvar = phi i64 [ 0, %entry ], [ %t, %bb ]
+  %k.04 = phi i32 [ 0, %entry ], [ %t8, %bb ]
+  %cp.05 = getelementptr i8, i8* %key, i64 %indvar
+  %t2 = shl i32 %k.04, 1
+  %t3 = lshr i32 %k.04, 14
+  %t4 = add i32 %t2, %t3
+  %t5 = load i8, i8* %cp.05, align 1
+  %t6 = sext i8 %t5 to i32
+  %t7 = xor i32 %t6, %t4
+  %t8 = and i32 %t7, 16383
+  %t = add i64 %indvar, 1
+  %scevgep = getelementptr i8, i8* %key, i64 %t
+  %t9 = load i8, i8* %scevgep, align 1
+  %t10 = icmp eq i8 %t9, 0
+  br i1 %t10, label %bb2, label %bb
+
+bb2:
+  %k.0.lcssa = phi i32 [ 0, %entry ], [ %t8, %bb ]
+  ret i32 %k.0.lcssa
+}
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
new file mode 100644
index 00000000000..8ab3ca88741
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n8:16:32:64"
+
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone
+declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
+declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>) nounwind readnone
+
+define i32 @lshr_ctlz_zero_is_not_undef(i32 %x) {
+; CHECK-LABEL: @lshr_ctlz_zero_is_not_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[SH]]
+;
+  %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %sh = lshr i32 %ct, 5
+  ret i32 %sh
+}
+
+define i32 @lshr_cttz_zero_is_not_undef(i32 %x) {
+; CHECK-LABEL: @lshr_cttz_zero_is_not_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[SH]]
+;
+  %ct = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  %sh = lshr i32 %ct, 5
+  ret i32 %sh
+}
+
+define i32 @lshr_ctpop(i32 %x) {
+; CHECK-LABEL: @lshr_ctpop(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %x, -1
+; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[SH]]
+;
+  %ct = call i32 @llvm.ctpop.i32(i32 %x)
+  %sh = lshr i32 %ct, 5
+  ret i32 %sh
+}
+
+define <2 x i8> @lshr_ctlz_zero_is_not_undef_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_ctlz_zero_is_not_undef_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
+;
+  %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 false)
+  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
+  ret <2 x i8> %sh
+}
+
+define <2 x i8> @lshr_cttz_zero_is_not_undef_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_cttz_zero_is_not_undef_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
+;
+  %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 false)
+  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
+  ret <2 x i8> %sh
+}
+
+define <2 x i8> @lshr_ctpop_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_ctpop_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, <i8 -1, i8 -1>
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
+;
+  %ct = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x)
+  %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
+  ret <2 x i8> %sh
+}
+
+define i8 @lshr_exact(i8 %x) {
+; CHECK-LABEL: @lshr_exact(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 %x, 2
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SHL]], 4
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i8 [[ADD]], 2
+; CHECK-NEXT:    ret i8 [[LSHR]]
+;
+  %shl = shl i8 %x, 2
+  %add = add i8 %shl, 4
+  %lshr = lshr i8 %add, 2
+  ret i8 %lshr
+}
+
+define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_exact_splat_vec(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> %x, <i8 2, i8 2>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[SHL]], <i8 4, i8 4>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact <2 x i8> [[ADD]], <i8 2, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[LSHR]]
+;
+  %shl = shl <2 x i8> %x, <i8 2, i8 2>
+  %add = add <2 x i8> %shl, <i8 4, i8 4>
+  %lshr = lshr <2 x i8> %add, <i8 2, i8 2>
+  ret <2 x i8> %lshr
+}
+
+define i16 @bool_zext(i1 %x) {
+; CHECK-LABEL: @bool_zext(
+; CHECK-NEXT:    [[HIBIT:%.*]] = zext i1 %x to i16
+; CHECK-NEXT:    ret i16 [[HIBIT]]
+;
+  %sext = sext i1 %x to i16
+  %hibit = lshr i16 %sext, 15
+  ret i16 %hibit
+}
+
+define <2 x i8> @bool_zext_splat(<2 x i1> %x) {
+; CHECK-LABEL: @bool_zext_splat(
+; CHECK-NEXT:    [[HIBIT:%.*]] = zext <2 x i1> %x to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[HIBIT]]
+;
+  %sext = sext <2 x i1> %x to <2 x i8>
+  %hibit = lshr <2 x i8> %sext, <i8 7, i8 7>
+  ret <2 x i8> %hibit
+}
+
+define i32 @smear_sign_and_widen(i8 %x) {
+; CHECK-LABEL: @smear_sign_and_widen(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 %x, 7
+; CHECK-NEXT:    [[HIBIT:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[HIBIT]]
+;
+  %sext = sext i8 %x to i32
+  %hibit = lshr i32 %sext, 24
+  ret i32 %hibit
+}
+
+define i16 @smear_sign_and_widen_should_not_change_type(i4 %x) {
+; CHECK-LABEL: @smear_sign_and_widen_should_not_change_type(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i4 %x to i16
+; CHECK-NEXT:    [[HIBIT:%.*]] = lshr i16 [[SEXT]], 12
+; CHECK-NEXT:    ret i16 [[HIBIT]]
+;
+  %sext = sext i4 %x to i16
+  %hibit = lshr i16 %sext, 12
+  ret i16 %hibit
+}
+
+define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) {
+; CHECK-LABEL: @smear_sign_and_widen_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i6> %x, <i6 2, i6 2>
+; CHECK-NEXT:    [[HIBIT:%.*]] = zext <2 x i6> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[HIBIT]]
+;
+  %sext = sext <2 x i6> %x to <2 x i8>
+  %hibit = lshr <2 x i8> %sext, <i8 2, i8 2>
+  ret <2 x i8> %hibit
+}
+
+define i18 @fake_sext(i3 %x) {
+; CHECK-LABEL: @fake_sext(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i3 %x, 2
+; CHECK-NEXT:    [[SH:%.*]] = zext i3 [[TMP1]] to i18
+; CHECK-NEXT:    ret i18 [[SH]]
+;
+  %sext = sext i3 %x to i18
+  %sh = lshr i18 %sext, 17
+  ret i18 %sh
+}
+
+; Avoid the transform if it would change the shift from a legal to illegal type.
+
+define i32 @fake_sext_but_should_not_change_type(i3 %x) {
+; CHECK-LABEL: @fake_sext_but_should_not_change_type(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i3 %x to i32
+; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[SEXT]], 31
+; CHECK-NEXT:    ret i32 [[SH]]
+;
+  %sext = sext i3 %x to i32
+  %sh = lshr i32 %sext, 31
+  ret i32 %sh
+}
+
+define <2 x i8> @fake_sext_splat(<2 x i3> %x) {
+; CHECK-LABEL: @fake_sext_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i3> %x, <i3 2, i3 2>
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i3> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[SH]]
+;
+  %sext = sext <2 x i3> %x to <2 x i8>
+  %sh = lshr <2 x i8> %sext, <i8 7, i8 7>
+  ret <2 x i8> %sh
+}
+
+; Use a narrow shift: lshr (zext iM X to iN), C --> zext (lshr X, C) to iN
+
+define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @narrow_lshr_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> %x, <i8 3, i8 3>
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[SH]]
+;
+  %zx = zext <2 x i8> %x to <2 x i32>
+  %sh = lshr <2 x i32> %zx, <i32 3, i32 3>
+  ret <2 x i32> %sh
+}
+
diff --git a/llvm/test/Transforms/InstCombine/malloc-free-delete.ll b/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
new file mode 100644
index 00000000000..7e7b6d9aee5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -0,0 +1,288 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR1201
+define i32 @main(i32 %argc, i8** %argv) {
+; CHECK-LABEL: @main(
+    %c_19 = alloca i8*
+    %malloc_206 = tail call i8* @malloc(i32 mul (i32 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i32), i32 10))
+    store i8* %malloc_206, i8** %c_19
+    %tmp_207 = load i8*, i8** %c_19
+    tail call void @free(i8* %tmp_207)
+    ret i32 0
+; CHECK-NEXT: ret i32 0
+}
+
+declare noalias i8* @calloc(i32, i32) nounwind
+declare noalias i8* @malloc(i32)
+declare void @free(i8*)
+
+define i1 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: ret i1 false
+  %m = call i8* @malloc(i32 1)
+  %z = icmp eq i8* %m, null
+  call void @free(i8* %m)
+  ret i1 %z
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+declare void @llvm.lifetime.end.p0i8(i64, i8*)
+declare i64 @llvm.objectsize.i64(i8*, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) nounwind
+
+define void @test3(i8* %src) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: ret void
+  %a = call noalias i8* @malloc(i32 10)
+  call void @llvm.lifetime.start.p0i8(i64 10, i8* %a)
+  call void @llvm.lifetime.end.p0i8(i64 10, i8* %a)
+  %size = call i64 @llvm.objectsize.i64(i8* %a, i1 true)
+  store i8 42, i8* %a
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a, i8 5, i32 32, i1 false)
+  %alloc2 = call noalias i8* @calloc(i32 5, i32 7) nounwind
+  %z = icmp ne i8* %alloc2, null
+  ret void
+}
+
+;; This used to crash.
+define void @test4() {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: ret void
+  %A = call i8* @malloc(i32 16000)
+  %B = bitcast i8* %A to double*
+  %C = bitcast double* %B to i8*
+  call void @free(i8* %C)
+  ret void
+}
+
+; CHECK-LABEL: @test5(
+define void @test5(i8* %ptr, i8** %esc) {
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call i8* @malloc
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: call void @llvm.memmove
+; CHECK-NEXT: store
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: call void @llvm.memmove
+; CHECK-NEXT: call void @llvm.memset
+; CHECK-NEXT: store volatile
+; CHECK-NEXT: ret
+  %a = call i8* @malloc(i32 700)
+  %b = call i8* @malloc(i32 700)
+  %c = call i8* @malloc(i32 700)
+  %d = call i8* @malloc(i32 700)
+  %e = call i8* @malloc(i32 700)
+  %f = call i8* @malloc(i32 700)
+  %g = call i8* @malloc(i32 700)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %a, i32 32, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %b, i32 32, i1 false)
+  store i8* %c, i8** %esc
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %ptr, i32 32, i1 true)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %e, i8* %ptr, i32 32, i1 true)
+  call void @llvm.memset.p0i8.i32(i8* %f, i8 5, i32 32, i1 true)
+  store volatile i8 4, i8* %g
+  ret void
+}
+
+;; When a basic block contains only a call to free and this block is accessed
+;; through a test of the argument of free against null, move the call in the
+;; predecessor block.
+;; Using simplifycfg will remove the empty basic block and the branch operation
+;; Then, performing a dead elimination will remove the comparison.
+;; This is what happens with -O1 and upper.
+; CHECK-LABEL: @test6(
+define void @test6(i8* %foo) minsize {
+; CHECK:  %tobool = icmp eq i8* %foo, null
+;; Call to free moved
+; CHECK-NEXT: tail call void @free(i8* %foo)
+; CHECK-NEXT: br i1 %tobool, label %if.end, label %if.then
+; CHECK: if.then:
+;; Block is now empty and may be simplified by simplifycfg
+; CHECK-NEXT:   br label %if.end
+; CHECK: if.end:
+; CHECK-NEXT:  ret void
+entry:
+  %tobool = icmp eq i8* %foo, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @free(i8* %foo)
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+declare i8* @_ZnwmRKSt9nothrow_t(i64, i8*) nobuiltin
+declare void @_ZdlPvRKSt9nothrow_t(i8*, i8*) nobuiltin
+declare i32 @__gxx_personality_v0(...)
+declare void @_ZN1AC2Ev(i8* %this)
+
+; CHECK-LABEL: @test7(
+define void @test7() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %nt = alloca i8
+  ; CHECK-NOT: call {{.*}}@_ZnwmRKSt9nothrow_t(
+  %call.i = tail call i8* @_ZnwmRKSt9nothrow_t(i64 1, i8* %nt) builtin nounwind
+  invoke void @_ZN1AC2Ev(i8* undef)
+          to label %.noexc.i unwind label %lpad.i
+
+.noexc.i:                                         ; preds = %entry
+  unreachable
+
+lpad.i:                                           ; preds = %entry
+  %0 = landingpad { i8*, i32 } cleanup
+  ; CHECK-NOT: call {{.*}}@_ZdlPvRKSt9nothrow_t(
+  call void @_ZdlPvRKSt9nothrow_t(i8* %call.i, i8* %nt) builtin nounwind
+  resume { i8*, i32 } %0
+}
+
+declare i8* @_Znwm(i64) nobuiltin
+define i8* @_Znwj(i32 %n) nobuiltin {
+  %z = zext i32 %n to i64
+  %m = call i8* @_Znwm(i64 %z)
+  ret i8* %m
+}
+declare i8* @_Znam(i64) nobuiltin
+declare i8* @_Znaj(i32) nobuiltin
+declare void @_ZdlPv(i8*) nobuiltin
+declare void @_ZdaPv(i8*) nobuiltin
+
+define linkonce void @_ZdlPvm(i8* %p, i64) nobuiltin {
+  call void @_ZdlPv(i8* %p)
+  ret void
+}
+define linkonce void @_ZdlPvj(i8* %p, i32) nobuiltin {
+  call void @_ZdlPv(i8* %p)
+  ret void
+}
+define linkonce void @_ZdaPvm(i8* %p, i64) nobuiltin {
+  call void @_ZdaPv(i8* %p)
+  ret void
+}
+define linkonce void @_ZdaPvj(i8* %p, i32) nobuiltin {
+  call void @_ZdaPv(i8* %p)
+  ret void
+}
+
+
+; new(size_t, align_val_t)
+declare i8* @_ZnwmSt11align_val_t(i64, i64) nobuiltin
+declare i8* @_ZnwjSt11align_val_t(i32, i32) nobuiltin
+; new[](size_t, align_val_t)
+declare i8* @_ZnamSt11align_val_t(i64, i64) nobuiltin
+declare i8* @_ZnajSt11align_val_t(i32, i32) nobuiltin
+; new(size_t, align_val_t, nothrow)
+declare i8* @_ZnwmSt11align_val_tRKSt9nothrow_t(i64, i64, i8*) nobuiltin
+declare i8* @_ZnwjSt11align_val_tRKSt9nothrow_t(i32, i32, i8*) nobuiltin
+; new[](size_t, align_val_t, nothrow)
+declare i8* @_ZnamSt11align_val_tRKSt9nothrow_t(i64, i64, i8*) nobuiltin
+declare i8* @_ZnajSt11align_val_tRKSt9nothrow_t(i32, i32, i8*) nobuiltin
+; delete(void*, align_val_t)
+declare void @_ZdlPvSt11align_val_t(i8*, i64) nobuiltin
+; delete[](void*, align_val_t)
+declare void @_ZdaPvSt11align_val_t(i8*, i64) nobuiltin
+; delete(void*, align_val_t, nothrow)
+declare void @_ZdlPvSt11align_val_tRKSt9nothrow_t(i8*, i64, i8*) nobuiltin
+; delete[](void*, align_val_t, nothrow)
+declare void @_ZdaPvSt11align_val_tRKSt9nothrow_t(i8*, i64, i8*) nobuiltin
+
+
+; CHECK-LABEL: @test8(
+define void @test8() {
+  ; CHECK-NOT: call
+  %nt = alloca i8
+  %nw = call i8* @_Znwm(i64 32) builtin
+  call void @_ZdlPv(i8* %nw) builtin
+  %na = call i8* @_Znam(i64 32) builtin
+  call void @_ZdaPv(i8* %na) builtin
+  %nwm = call i8* @_Znwm(i64 32) builtin
+  call void @_ZdlPvm(i8* %nwm, i64 32) builtin
+  %nwj = call i8* @_Znwj(i32 32) builtin
+  call void @_ZdlPvj(i8* %nwj, i32 32) builtin
+  %nam = call i8* @_Znam(i64 32) builtin
+  call void @_ZdaPvm(i8* %nam, i64 32) builtin
+  %naj = call i8* @_Znaj(i32 32) builtin
+  call void @_ZdaPvj(i8* %naj, i32 32) builtin
+  %nwa = call i8* @_ZnwmSt11align_val_t(i64 32, i64 8) builtin
+  call void @_ZdlPvSt11align_val_t(i8* %nwa, i64 8) builtin
+  %naa = call i8* @_ZnamSt11align_val_t(i64 32, i64 8) builtin
+  call void @_ZdaPvSt11align_val_t(i8* %naa, i64 8) builtin
+  %nwja = call i8* @_ZnwjSt11align_val_t(i32 32, i32 8) builtin
+  call void @_ZdlPvSt11align_val_t(i8* %nwja, i64 8) builtin
+  %naja = call i8* @_ZnajSt11align_val_t(i32 32, i32 8) builtin
+  call void @_ZdaPvSt11align_val_t(i8* %naja, i64 8) builtin
+  %nwat = call i8* @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 32, i64 8, i8* %nt) builtin
+  call void @_ZdlPvSt11align_val_tRKSt9nothrow_t(i8* %nwat, i64 8, i8* %nt) builtin
+  %naat = call i8* @_ZnamSt11align_val_tRKSt9nothrow_t(i64 32, i64 8, i8* %nt) builtin
+  call void @_ZdaPvSt11align_val_tRKSt9nothrow_t(i8* %naat, i64 8, i8* %nt) builtin
+  %nwjat = call i8* @_ZnwjSt11align_val_tRKSt9nothrow_t(i32 32, i32 8, i8* %nt) builtin
+  call void @_ZdlPvSt11align_val_tRKSt9nothrow_t(i8* %nwjat, i64 8, i8* %nt) builtin
+  %najat = call i8* @_ZnajSt11align_val_tRKSt9nothrow_t(i32 32, i32 8, i8* %nt) builtin
+  call void @_ZdaPvSt11align_val_tRKSt9nothrow_t(i8* %najat, i64 8, i8* %nt) builtin
+  ret void
+}
+
+declare noalias i8* @"\01??2@YAPEAX_K@Z"(i64) nobuiltin
+declare void @"\01??3@YAXPEAX@Z"(i8*) nobuiltin
+
+; CHECK-LABEL: @test9(
+define void @test9() {
+  ; CHECK-NOT: call
+  %new_long_long = call noalias i8* @"\01??2@YAPEAX_K@Z"(i64 32) builtin
+  call void @"\01??3@YAXPEAX@Z"(i8* %new_long_long) builtin
+  ret void
+}
+
+define void @test10()  {
+; CHECK-LABEL: @test10
+; CHECK: call void @_ZdlPv
+  call void @_ZdlPv(i8* null)
+  ret void
+}
+
+define void @test11() {
+; CHECK-LABEL: @test11
+; CHECK: call i8* @_Znwm
+; CHECK: call void @_ZdlPv
+  %call = call i8* @_Znwm(i64 8) builtin
+  call void @_ZdlPv(i8* %call)
+  ret void
+}
+
+;; Check that the optimization that moves a call to free in its predecessor
+;; block (see test6) also happens when noop casts are involved.
+; CHECK-LABEL: @test12(
+define void @test12(i32* %foo) minsize {
+; CHECK:  %tobool = icmp eq i32* %foo, null
+;; Everything before the call to free should have been moved as well.
+; CHECK-NEXT:   %bitcast = bitcast i32* %foo to i8*
+;; Call to free moved
+; CHECK-NEXT: tail call void @free(i8* %bitcast)
+; CHECK-NEXT: br i1 %tobool, label %if.end, label %if.then
+; CHECK: if.then:
+;; Block is now empty and may be simplified by simplifycfg
+; CHECK-NEXT:   br label %if.end
+; CHECK: if.end:
+; CHECK-NEXT:  ret void
+entry:
+  %tobool = icmp eq i32* %foo, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %bitcast = bitcast i32* %foo to i8*
+  tail call void @free(i8* %bitcast)
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/masked-merge-add.ll b/llvm/test/Transforms/InstCombine/masked-merge-add.ll
new file mode 100644
index 00000000000..40ee8952fbf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/masked-merge-add.ll
@@ -0,0 +1,415 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=6773
+
+; Patterns:
+;   (x & m) | (y & ~m)
+;   (x & m) ^ (y & ~m)
+;   (x & m) + (y & ~m)
+; Should be transformed into:
+;   (x & m) | (y & ~m)
+; And then into:
+;   ((x ^ y) & m) ^ y
+
+; ============================================================================ ;
+; Most basic positive tests
+; ============================================================================ ;
+
+define i32 @p(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %m) {
+; CHECK-LABEL: @p_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, %m
+  %neg = xor <2 x i32> %m, <i32 -1, i32 -1>
+  %and1 = and <2 x i32> %neg, %y
+  %ret = add <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> %m) {
+; CHECK-LABEL: @p_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 undef, i32 -1>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, %m
+  %neg = xor <3 x i32> %m, <i32 -1, i32 undef, i32 -1>
+  %and1 = and <3 x i32> %neg, %y
+  %ret = add <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant mask.
+; ============================================================================ ;
+
+define i32 @p_constmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 65280>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 65280, i32 65280>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -65281>
+  %ret = add <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @p_constmask_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 16776960>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -16776961>
+; CHECK-NEXT:    [[RET:%.*]] = add <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 65280, i32 16776960>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -16776961>
+  %ret = add <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_constmask_vec_undef(<3 x i32> %x, <3 x i32> %y) {
+; CHECK-LABEL: @p_constmask_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], <i32 65280, i32 undef, i32 65280>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], <i32 -65281, i32 undef, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = add <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, <i32 65280, i32 undef, i32 65280>
+  %and1 = and <3 x i32> %y, <i32 -65281, i32 undef, i32 -65281>
+  %ret = add <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant mask with no common bits set, but common unset bits.
+; ============================================================================ ;
+
+define i32 @p_constmask2(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 61440
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 61440
+  %and1 = and i32 %y, -65281
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 61440>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 61440, i32 61440>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -65281>
+  %ret = add <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @p_constmask2_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 16711680>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -16776961>
+; CHECK-NEXT:    [[RET:%.*]] = add <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 61440, i32 16711680>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -16776961>
+  %ret = add <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_constmask2_vec_undef(<3 x i32> %x, <3 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], <i32 61440, i32 undef, i32 61440>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], <i32 -65281, i32 undef, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = add <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, <i32 61440, i32 undef, i32 61440>
+  %and1 = and <3 x i32> %y, <i32 -65281, i32 undef, i32 -65281>
+  %ret = add <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Commutativity.
+; ============================================================================ ;
+
+; Used to make sure that the IR complexity sorting does not interfere.
+declare i32 @gen32()
+
+define i32 @p_commutative0(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative0(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative1(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative1(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative2(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = add i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative3(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative3(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative4(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative4(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = add i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative5(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative5(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = add i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative6(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative6(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = add i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_constmask_commutative(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask_commutative(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = add i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use32(i32)
+
+define i32 @n0_oneuse(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    call void @use32(i32 [[AND]])
+; CHECK-NEXT:    call void @use32(i32 [[NEG]])
+; CHECK-NEXT:    call void @use32(i32 [[AND1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = add i32 %and, %and1
+  call void @use32(i32 %and)
+  call void @use32(i32 %neg)
+  call void @use32(i32 %and1)
+  ret i32 %ret
+}
+
+define i32 @n0_constmask_oneuse(i32 %x, i32 %y) {
+; CHECK-LABEL: @n0_constmask_oneuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    call void @use32(i32 [[AND]])
+; CHECK-NEXT:    call void @use32(i32 [[AND1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = add i32 %and, %and1
+  call void @use32(i32 %and)
+  call void @use32(i32 %and1)
+  ret i32 %ret
+}
+
+; Bad xor constant
+
+define i32 @n1_badxor(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n1_badxor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, 1 ; not -1
+  %and1 = and i32 %neg, %y
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+; Different mask is used
+
+define i32 @n2_badmask(i32 %x, i32 %y, i32 %m1, i32 %m2) {
+; CHECK-LABEL: @n2_badmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M1:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M2:%.*]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m1, %x
+  %neg = xor i32 %m2, -1 ; different mask, not %m1
+  %and1 = and i32 %neg, %y
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+; Different const mask is used
+
+define i32 @n3_constmask_badmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_badmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65280
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65280 ; not -65281, so they have one common bit set
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @n3_constmask_samemask(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_samemask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], 65280
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, 65280 ; both masks are the same
+  %ret = add i32 %and, %and1
+  ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll b/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll
new file mode 100644
index 00000000000..48346ad6505
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll
@@ -0,0 +1,509 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=6773
+
+; Pattern:
+;   (x | ~m) & (y & m)
+; Should be transformed into:
+;   (x & m) | (y & ~m)
+; And then into:
+;   ((x ^ y) & m) ^ y
+
+; ============================================================================ ;
+; Most basic positive tests
+; ============================================================================ ;
+
+define i32 @p(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %m) {
+; CHECK-LABEL: @p_splatvec(
+; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %neg = xor <2 x i32> %m, <i32 -1, i32 -1>
+  %or = or <2 x i32> %neg, %x
+  %or1 = or <2 x i32> %y, %m
+  %ret = and <2 x i32> %or, %or1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> %m) {
+; CHECK-LABEL: @p_vec_undef(
+; CHECK-NEXT:    [[NEG:%.*]] = xor <3 x i32> [[M:%.*]], <i32 -1, i32 undef, i32 -1>
+; CHECK-NEXT:    [[OR:%.*]] = or <3 x i32> [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or <3 x i32> [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[OR]], [[OR1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %neg = xor <3 x i32> %m, <i32 -1, i32 undef, i32 -1>
+  %or = or <3 x i32> %neg, %x
+  %or1 = or <3 x i32> %y, %m
+  %ret = and <3 x i32> %or, %or1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant mask.
+; ============================================================================ ;
+
+define i32 @p_constmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -65281
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], 65280
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %or = or i32 %x, -65281
+  %or1 = or i32 %y, 65280
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask_splatvec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[X:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], <i32 65280, i32 65280>
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %or = or <2 x i32> %x, <i32 -65281, i32 -65281>
+  %or1 = or <2 x i32> %y, <i32 65280, i32 65280>
+  %ret = and <2 x i32> %or, %or1
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @p_constmask_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[X:%.*]], <i32 -65281, i32 -16776961>
+; CHECK-NEXT:    [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], <i32 65280, i32 16776960>
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %or = or <2 x i32> %x, <i32 -65281, i32 -16776961>
+  %or1 = or <2 x i32> %y, <i32 65280, i32 16776960>
+  %ret = and <2 x i32> %or, %or1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_constmask_vec_undef(<3 x i32> %x, <3 x i32> %y) {
+; CHECK-LABEL: @p_constmask_vec_undef(
+; CHECK-NEXT:    [[OR:%.*]] = or <3 x i32> [[X:%.*]], <i32 -65281, i32 undef, i32 -65281>
+; CHECK-NEXT:    [[OR1:%.*]] = or <3 x i32> [[Y:%.*]], <i32 65280, i32 undef, i32 65280>
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[OR]], [[OR1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %or = or <3 x i32> %x, <i32 -65281, i32 undef, i32 -65281>
+  %or1 = or <3 x i32> %y, <i32 65280, i32 undef, i32 65280>
+  %ret = and <3 x i32> %or, %or1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Commutativity.
+; ============================================================================ ;
+
+; Used to make sure that the IR complexity sorting does not interfere.
+declare i32 @gen32()
+
+define i32 @p_commutative0(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative0(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %x, %neg ; swapped order
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+define i32 @p_commutative1(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative1(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %m, %y; swapped order
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+define i32 @p_commutative2(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative2(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR1]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or1, %or ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative3(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative3(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %neg = xor i32 %m, -1
+  %or = or i32 %x, %neg ; swapped order
+  %or1 = or i32 %m, %y; swapped order
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+define i32 @p_commutative4(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative4(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR1]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %x, %neg ; swapped order
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or1, %or ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative5(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative5(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR1]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %m, %y; swapped order
+  %ret = and i32 %or1, %or ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative6(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative6(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR1]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %neg = xor i32 %m, -1
+  %or = or i32 %x, %neg ; swapped order
+  %or1 = or i32 %m, %y; swapped order
+  %ret = and i32 %or1, %or ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_constmask_commutative(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask_commutative(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -65281
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], 65280
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR1]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %or = or i32 %x, -65281
+  %or1 = or i32 %y, 65280
+  %ret = and i32 %or1, %or ; swapped order
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use32(i32)
+
+define i32 @n0_oneuse_of_neg_is_ok_0(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse_of_neg_is_ok_0(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[NEG]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %neg)
+  ret i32 %ret
+}
+
+define i32 @n0_oneuse_1(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse_1(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[OR]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %or)
+  ret i32 %ret
+}
+
+define i32 @n0_oneuse_2(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse_2(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[OR1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %or1)
+  ret i32 %ret
+}
+
+define i32 @n0_oneuse_3(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse_3(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[NEG]])
+; CHECK-NEXT:    call void @use32(i32 [[OR]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %neg)
+  call void @use32(i32 %or)
+  ret i32 %ret
+}
+
+define i32 @n0_oneuse_4(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse_4(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[NEG]])
+; CHECK-NEXT:    call void @use32(i32 [[OR1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %neg)
+  call void @use32(i32 %or1)
+  ret i32 %ret
+}
+
+define i32 @n0_oneuse_5(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse_5(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[NEG]])
+; CHECK-NEXT:    call void @use32(i32 [[OR]])
+; CHECK-NEXT:    call void @use32(i32 [[OR1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %neg)
+  call void @use32(i32 %or)
+  call void @use32(i32 %or1)
+  ret i32 %ret
+}
+
+define i32 @n0_oneuse_6(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse_6(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[OR]])
+; CHECK-NEXT:    call void @use32(i32 [[OR1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %or)
+  call void @use32(i32 %or1)
+  ret i32 %ret
+}
+
+; One-use with constant mask
+
+define i32 @n0_constmask_oneuse_0(i32 %x, i32 %y) {
+; CHECK-LABEL: @n0_constmask_oneuse_0(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -65281
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], 65280
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[OR]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %or = or i32 %x, -65281
+  %or1 = or i32 %y, 65280
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %or)
+  ret i32 %ret
+}
+
+define i32 @n0_constmask_oneuse_1(i32 %x, i32 %y) {
+; CHECK-LABEL: @n0_constmask_oneuse_1(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -65281
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], 65280
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[OR1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %or = or i32 %x, -65281
+  %or1 = or i32 %y, 65280
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %or1)
+  ret i32 %ret
+}
+
+define i32 @n0_constmask_oneuse_2(i32 %x, i32 %y) {
+; CHECK-LABEL: @n0_constmask_oneuse_2(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -65281
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], 65280
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    call void @use32(i32 [[OR]])
+; CHECK-NEXT:    call void @use32(i32 [[OR1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %or = or i32 %x, -65281
+  %or1 = or i32 %y, 65280
+  %ret = and i32 %or, %or1
+  call void @use32(i32 %or)
+  call void @use32(i32 %or1)
+  ret i32 %ret
+}
+
+; Bad xor constant
+
+define i32 @n1_badxor(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n1_badxor(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M:%.*]], 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], [[M]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m, 1 ; not -1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %y, %m
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+; Different mask is used
+
+define i32 @n2_badmask(i32 %x, i32 %y, i32 %m1, i32 %m2) {
+; CHECK-LABEL: @n2_badmask(
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M2:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[NEG]], [[X:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[M1:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %neg = xor i32 %m2, -1 ; different mask, not %m1
+  %or = or i32 %neg, %x
+  %or1 = or i32 %m1, %y
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+; Different const mask is used
+
+define i32 @n3_constmask_badmask_set(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_badmask_set(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -65281
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], 65281
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %or = or i32 %x, -65281
+  %or1 = or i32 %y, 65281 ; not 65280, so they have one common bit
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+define i32 @n3_constmask_badmask_unset(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_badmask_unset(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -65281
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[Y:%.*]], 65024
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[OR]], [[OR1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %or = or i32 %x, -65281
+  %or1 = or i32 %y, 65024 ; not 65280, so they have one common unset bit
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
+
+define i32 @n3_constmask_samemask(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_samemask(
+; CHECK-NEXT:    [[OR2:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[OR2]], -65281
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %or = or i32 %x, -65281
+  %or1 = or i32 %y, -65281 ; both masks are the same
+  %ret = and i32 %or, %or1
+  ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/masked-merge-or.ll b/llvm/test/Transforms/InstCombine/masked-merge-or.ll
new file mode 100644
index 00000000000..377d4325204
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/masked-merge-or.ll
@@ -0,0 +1,414 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=6773
+
+; Patterns:
+;   (x & m) | (y & ~m)
+;   (x & m) ^ (y & ~m)
+;   (x & m) + (y & ~m)
+; Should be transformed into:
+;   (x & m) | (y & ~m)
+; And then into:
+;   ((x ^ y) & m) ^ y
+
+; ============================================================================ ;
+; Most basic positive tests
+; ============================================================================ ;
+
+define i32 @p(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %m) {
+; CHECK-LABEL: @p_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, %m
+  %neg = xor <2 x i32> %m, <i32 -1, i32 -1>
+  %and1 = and <2 x i32> %neg, %y
+  %ret = or <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> %m) {
+; CHECK-LABEL: @p_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 undef, i32 -1>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, %m
+  %neg = xor <3 x i32> %m, <i32 -1, i32 undef, i32 -1>
+  %and1 = and <3 x i32> %neg, %y
+  %ret = or <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant mask.
+; ============================================================================ ;
+
+define i32 @p_constmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 65280>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 65280, i32 65280>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -65281>
+  %ret = or <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @p_constmask_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 16776960>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -16776961>
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 65280, i32 16776960>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -16776961>
+  %ret = or <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_constmask_vec_undef(<3 x i32> %x, <3 x i32> %y) {
+; CHECK-LABEL: @p_constmask_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], <i32 65280, i32 undef, i32 65280>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], <i32 -65281, i32 undef, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = or <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, <i32 65280, i32 undef, i32 65280>
+  %and1 = and <3 x i32> %y, <i32 -65281, i32 undef, i32 -65281>
+  %ret = or <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant mask with no common bits set, but common unset bits.
+; ============================================================================ ;
+
+define i32 @p_constmask2(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 61440
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 61440
+  %and1 = and i32 %y, -65281
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 61440>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 61440, i32 61440>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -65281>
+  %ret = or <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @p_constmask2_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 16711680>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -16776961>
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 61440, i32 16711680>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -16776961>
+  %ret = or <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_constmask2_vec_undef(<3 x i32> %x, <3 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], <i32 61440, i32 undef, i32 61440>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], <i32 -65281, i32 undef, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = or <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, <i32 61440, i32 undef, i32 61440>
+  %and1 = and <3 x i32> %y, <i32 -65281, i32 undef, i32 -65281>
+  %ret = or <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Commutativity.
+; ============================================================================ ;
+
+; Used to make sure that the IR complexity sorting does not interfere.
+declare i32 @gen32()
+
+define i32 @p_commutative0(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative0(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative1(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative1(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative2(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = or i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative3(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative3(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative4(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative4(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = or i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative5(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative5(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = or i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative6(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative6(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = or i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_constmask_commutative(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask_commutative(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = or i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use32(i32)
+
+define i32 @n0_oneuse(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    call void @use32(i32 [[AND]])
+; CHECK-NEXT:    call void @use32(i32 [[NEG]])
+; CHECK-NEXT:    call void @use32(i32 [[AND1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = or i32 %and, %and1
+  call void @use32(i32 %and)
+  call void @use32(i32 %neg)
+  call void @use32(i32 %and1)
+  ret i32 %ret
+}
+
+define i32 @n0_constmask_oneuse(i32 %x, i32 %y) {
+; CHECK-LABEL: @n0_constmask_oneuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    call void @use32(i32 [[AND]])
+; CHECK-NEXT:    call void @use32(i32 [[AND1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = or i32 %and, %and1
+  call void @use32(i32 %and)
+  call void @use32(i32 %and1)
+  ret i32 %ret
+}
+
+; Bad xor constant
+
+define i32 @n1_badxor(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n1_badxor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, 1 ; not -1
+  %and1 = and i32 %neg, %y
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+; Different mask is used
+
+define i32 @n2_badmask(i32 %x, i32 %y, i32 %m1, i32 %m2) {
+; CHECK-LABEL: @n2_badmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M1:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M2:%.*]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m1, %x
+  %neg = xor i32 %m2, -1 ; different mask, not %m1
+  %and1 = and i32 %neg, %y
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+; Different const mask is used
+
+define i32 @n3_constmask_badmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_badmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65280
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65280 ; not -65281, so they have one common bit
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @n3_constmask_samemask(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_samemask(
+; CHECK-NEXT:    [[AND2:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[AND2]], 65280
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, 65280 ; both masks are the same
+  %ret = or i32 %and, %and1
+  ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll
new file mode 100644
index 00000000000..6a3e9178ac9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll
@@ -0,0 +1,414 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=6773
+
+; Patterns:
+;   (x & m) | (y & ~m)
+;   (x & m) ^ (y & ~m)
+;   (x & m) + (y & ~m)
+; Should be transformed into:
+;   (x & m) | (y & ~m)
+; And then into:
+;   ((x ^ y) & m) ^ y
+
+; ============================================================================ ;
+; Most basic positive tests
+; ============================================================================ ;
+
+define i32 @p(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %m) {
+; CHECK-LABEL: @p_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, %m
+  %neg = xor <2 x i32> %m, <i32 -1, i32 -1>
+  %and1 = and <2 x i32> %neg, %y
+  %ret = xor <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> %m) {
+; CHECK-LABEL: @p_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 undef, i32 -1>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, %m
+  %neg = xor <3 x i32> %m, <i32 -1, i32 undef, i32 -1>
+  %and1 = and <3 x i32> %neg, %y
+  %ret = xor <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant mask.
+; ============================================================================ ;
+
+define i32 @p_constmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET1:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET1]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 65280>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[RET1:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET1]]
+;
+  %and = and <2 x i32> %x, <i32 65280, i32 65280>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -65281>
+  %ret = xor <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @p_constmask_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 16776960>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -16776961>
+; CHECK-NEXT:    [[RET:%.*]] = xor <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 65280, i32 16776960>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -16776961>
+  %ret = xor <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_constmask_vec_undef(<3 x i32> %x, <3 x i32> %y) {
+; CHECK-LABEL: @p_constmask_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], <i32 65280, i32 undef, i32 65280>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], <i32 -65281, i32 undef, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, <i32 65280, i32 undef, i32 65280>
+  %and1 = and <3 x i32> %y, <i32 -65281, i32 undef, i32 -65281>
+  %ret = xor <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant mask with no common bits set, but common unset bits.
+; ============================================================================ ;
+
+define i32 @p_constmask2(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 61440
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET1:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET1]]
+;
+  %and = and i32 %x, 61440
+  %and1 = and i32 %y, -65281
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_splatvec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 61440>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[RET1:%.*]] = or <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET1]]
+;
+  %and = and <2 x i32> %x, <i32 61440, i32 61440>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -65281>
+  %ret = xor <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @p_constmask2_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 16711680>
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -16776961>
+; CHECK-NEXT:    [[RET:%.*]] = xor <2 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %and = and <2 x i32> %x, <i32 61440, i32 16711680>
+  %and1 = and <2 x i32> %y, <i32 -65281, i32 -16776961>
+  %ret = xor <2 x i32> %and, %and1
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @p_constmask2_vec_undef(<3 x i32> %x, <3 x i32> %y) {
+; CHECK-LABEL: @p_constmask2_vec_undef(
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[X:%.*]], <i32 61440, i32 undef, i32 61440>
+; CHECK-NEXT:    [[AND1:%.*]] = and <3 x i32> [[Y:%.*]], <i32 -65281, i32 undef, i32 -65281>
+; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[AND]], [[AND1]]
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %and = and <3 x i32> %x, <i32 61440, i32 undef, i32 61440>
+  %and1 = and <3 x i32> %y, <i32 -65281, i32 undef, i32 -65281>
+  %ret = xor <3 x i32> %and, %and1
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Commutativity.
+; ============================================================================ ;
+
+; Used to make sure that the IR complexity sorting does not interfere.
+declare i32 @gen32()
+
+define i32 @p_commutative0(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative0(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative1(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative1(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative2(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = xor i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative3(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative3(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @p_commutative4(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @p_commutative4(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = xor i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative5(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative5(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = xor i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_commutative6(i32 %x, i32 %m) {
+; CHECK-LABEL: @p_commutative6(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %y = call i32 @gen32()
+  %and = and i32 %m, %x ; swapped order
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %y, %neg; swapped order
+  %ret = xor i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+define i32 @p_constmask_commutative(i32 %x, i32 %y) {
+; CHECK-LABEL: @p_constmask_commutative(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET1:%.*]] = or i32 [[AND1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[RET1]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = xor i32 %and1, %and ; swapped order
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use32(i32)
+
+define i32 @n0_oneuse(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n0_oneuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    call void @use32(i32 [[AND]])
+; CHECK-NEXT:    call void @use32(i32 [[NEG]])
+; CHECK-NEXT:    call void @use32(i32 [[AND1]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, -1
+  %and1 = and i32 %neg, %y
+  %ret = xor i32 %and, %and1
+  call void @use32(i32 %and)
+  call void @use32(i32 %neg)
+  call void @use32(i32 %and1)
+  ret i32 %ret
+}
+
+define i32 @n0_constmask_oneuse(i32 %x, i32 %y) {
+; CHECK-LABEL: @n0_constmask_oneuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65281
+; CHECK-NEXT:    [[RET1:%.*]] = or i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    call void @use32(i32 [[AND]])
+; CHECK-NEXT:    call void @use32(i32 [[AND1]])
+; CHECK-NEXT:    ret i32 [[RET1]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65281
+  %ret = xor i32 %and, %and1
+  call void @use32(i32 %and)
+  call void @use32(i32 %and1)
+  ret i32 %ret
+}
+
+; Bad xor constant
+
+define i32 @n1_badxor(i32 %x, i32 %y, i32 %m) {
+; CHECK-LABEL: @n1_badxor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M]], 1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, %m
+  %neg = xor i32 %m, 1 ; not -1
+  %and1 = and i32 %neg, %y
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+; Different mask is used
+
+define i32 @n2_badmask(i32 %x, i32 %y, i32 %m1, i32 %m2) {
+; CHECK-LABEL: @n2_badmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[M1:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = xor i32 [[M2:%.*]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %m1, %x
+  %neg = xor i32 %m2, -1 ; different mask, not %m1
+  %and1 = and i32 %neg, %y
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+; Different const mask is used
+
+define i32 @n3_constmask_badmask(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_badmask(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 65280
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], -65280
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[AND]], [[AND1]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, -65280 ; not -65281, so they have one common bit
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
+
+define i32 @n3_constmask_samemask(i32 %x, i32 %y) {
+; CHECK-LABEL: @n3_constmask_samemask(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[AND2]], 65280
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %and = and i32 %x, 65280
+  %and1 = and i32 %y, 65280 ; both masks are the same
+  %ret = xor i32 %and, %and1
+  ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
new file mode 100644
index 00000000000..582fd8f5c82
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -0,0 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
+declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
+declare <4 x double> @llvm.masked.gather.v4f64.p0v4f64(<4 x double*> %ptrs, i32, <4 x i1> %mask, <4 x double> %passthru)
+declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
+
+define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru)  {
+; CHECK-LABEL: @load_zeromask(
+; CHECK-NEXT:    ret <2 x double> [[PASSTHRU:%.*]]
+;
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru)  {
+; CHECK-LABEL: @load_onemask(
+; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
+; CHECK-NEXT:    ret <2 x double> [[UNMASKEDLOAD]]
+;
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+define <2 x double> @load_undefmask(<2 x double>* %ptr, <2 x double> %passthru)  {
+; CHECK-LABEL: @load_undefmask(
+; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
+; CHECK-NEXT:    ret <2 x double> [[UNMASKEDLOAD]]
+;
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 undef>, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+@G = external global i8
+
+define <2 x double> @load_cemask(<2 x double>* %ptr, <2 x double> %passthru)  {
+; CHECK-LABEL: @load_cemask(
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 ptrtoint (i8* @G to i1)>, <2 x double> [[PASSTHRU:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 ptrtoint (i8* @G to i1)>, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+define <2 x double> @load_lane0(<2 x double>* %ptr, double %pt)  {
+; CHECK-LABEL: @load_lane0(
+; CHECK-NEXT:    [[PTV2:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 1
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> [[PTV2]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
+  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> %ptv2)
+  ret <2 x double> %res
+}
+
+define double @load_all(double* %base, double %pt)  {
+; CHECK-LABEL: @load_all(
+; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 0, i64 undef, i64 2, i64 3>
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x double> [[RES]], i64 2
+; CHECK-NEXT:    ret double [[ELT]]
+;
+  %ptrs = getelementptr double, double* %base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+  %res = call <4 x double> @llvm.masked.gather.v4f64.p0v4f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
+  %elt = extractelement <4 x double> %res, i64 2
+  ret double %elt
+}
+
+define <2 x double> @load_generic(<2 x double>* %ptr, double %pt,
+; CHECK-LABEL: @load_generic(
+; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
+; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  <2 x i1> %mask)  {
+  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
+  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
+  ret <2 x double> %res
+}
+
+define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) %ptr,
+; CHECK-LABEL: @load_speculative(
+; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
+; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  double %pt, <2 x i1> %mask)  {
+  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
+  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
+  ret <2 x double> %res
+}
+
+; Can't speculate since only half of required size is known deref
+define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr,
+; CHECK-LABEL: @load_spec_neg_size(
+; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
+; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  double %pt, <2 x i1> %mask)  {
+  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
+  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
+  ret <2 x double> %res
+}
+
+; Can only speculate one lane (but it's the only one active)
+define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr,
+; CHECK-LABEL: @load_spec_lan0(
+; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
+; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PTV2]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  double %pt, <2 x i1> %mask)  {
+  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
+  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
+  %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask2, <2 x double> %ptv2)
+  ret <2 x double> %res
+}
+
+define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val)  {
+; CHECK-LABEL: @store_zeromask(
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> zeroinitializer)
+  ret void
+}
+
+define void @store_onemask(<2 x double>* %ptr, <2 x double> %val)  {
+; CHECK-LABEL: @store_onemask(
+; CHECK-NEXT:    store <2 x double> [[VAL:%.*]], <2 x double>* [[PTR:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>)
+  ret void
+}
+
+define void @store_demandedelts(<2 x double>* %ptr, double %val)  {
+; CHECK-LABEL: @store_demandedelts(
+; CHECK-NEXT:    [[VALVEC2:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0
+; CHECK-NEXT:    call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC2]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> <i1 true, i1 false>)
+; CHECK-NEXT:    ret void
+;
+  %valvec1 = insertelement <2 x double> undef, double %val, i32 0
+  %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
+  call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %valvec2, <2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>)
+  ret void
+}
+
+define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask,
+; CHECK-LABEL: @gather_generic(
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PASSTHRU:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  <2 x double> %passthru)  {
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+
+define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru)  {
+; CHECK-LABEL: @gather_zeromask(
+; CHECK-NEXT:    ret <2 x double> [[PASSTHRU:%.*]]
+;
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> zeroinitializer, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+
+define <2 x double> @gather_onemask(<2 x double*> %ptrs, <2 x double> %passthru)  {
+; CHECK-LABEL: @gather_onemask(
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+define <2 x double> @gather_lane0(double* %base, double %pt)  {
+; CHECK-LABEL: @gather_lane0(
+; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 undef>
+; CHECK-NEXT:    [[PT_V2:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 1
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> <i1 true, i1 false>, <2 x double> [[PT_V2]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
+  %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0
+  %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> <i1 true, i1 false>, <2 x double> %pt_v2)
+  ret <2 x double> %res
+}
+
+define <2 x double> @gather_lane0_maybe(double* %base, double %pt,
+; CHECK-LABEL: @gather_lane0_maybe(
+; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
+; CHECK-NEXT:    [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  <2 x i1> %mask)  {
+  %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
+  %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0
+  %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
+  %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
+  ret <2 x double> %res
+}
+
+define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt,
+; CHECK-LABEL: @gather_lane0_maybe_spec(
+; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
+; CHECK-NEXT:    [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
+; CHECK-NEXT:    ret <2 x double> [[RES]]
+;
+  <2 x i1> %mask)  {
+  %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
+  %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0
+  %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
+  %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
+  ret <2 x double> %res
+}
+
+
+define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val)  {
+; CHECK-LABEL: @scatter_zeromask(
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer)
+  ret void
+}
+
+define void @scatter_demandedelts(double* %ptr, double %val)  {
+; CHECK-LABEL: @scatter_demandedelts(
+; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[PTR:%.*]], <2 x i64> <i64 0, i64 undef>
+; CHECK-NEXT:    [[VALVEC2:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC2]], <2 x double*> [[PTRS]], i32 8, <2 x i1> <i1 true, i1 false>)
+; CHECK-NEXT:    ret void
+;
+  %ptrs = getelementptr double, double* %ptr, <2 x i64> <i64 0, i64 1>
+  %valvec1 = insertelement <2 x double> undef, double %val, i32 0
+  %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
+  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %valvec2, <2 x double*> %ptrs, i32 8, <2 x i1> <i1 true, i1 false>)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/max-of-nots.ll b/llvm/test/Transforms/InstCombine/max-of-nots.ll
new file mode 100644
index 00000000000..b8643f5002e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/max-of-nots.ll
@@ -0,0 +1,360 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @umin_of_nots(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> [[Y]]
+; CHECK-NEXT:    [[MIN:%.*]] = xor <2 x i32> [[TMP2]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> [[MIN]]
+;
+  %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %noty = xor <2 x i32> %y, <i32 -1, i32 -1>
+  %cmp = icmp ult <2 x i32> %notx, %noty
+  %min = select <2 x i1> %cmp, <2 x i32> %notx, <2 x i32> %noty
+  ret <2 x i32> %min
+}
+
+define <2 x i32> @smin_of_nots(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @smin_of_nots(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> [[Y]]
+; CHECK-NEXT:    [[MIN:%.*]] = xor <2 x i32> [[TMP2]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> [[MIN]]
+;
+  %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %noty = xor <2 x i32> %y, <i32 -1, i32 -1>
+  %cmp = icmp sle <2 x i32> %notx, %noty
+  %min = select <2 x i1> %cmp, <2 x i32> %notx, <2 x i32> %noty
+  ret <2 x i32> %min
+}
+
+define i32 @compute_min_2(i32 %x, i32 %y) {
+; CHECK-LABEL: @compute_min_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %not_x = sub i32 -1, %x
+  %not_y = sub i32 -1, %y
+  %cmp = icmp sgt i32 %not_x, %not_y
+  %not_min = select i1 %cmp, i32 %not_x, i32 %not_y
+  %min = sub i32 -1, %not_min
+  ret i32 %min
+}
+
+declare void @extra_use(i8)
+define i8 @umin_not_1_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @umin_not_1_extra_use(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[Y]], i8 [[X]]
+; CHECK-NEXT:    [[MINXY:%.*]] = xor i8 [[TMP2]], -1
+; CHECK-NEXT:    call void @extra_use(i8 [[NX]])
+; CHECK-NEXT:    ret i8 [[MINXY]]
+;
+  %nx = xor i8 %x, -1
+  %ny = xor i8 %y, -1
+  %cmpxy = icmp ult i8 %nx, %ny
+  %minxy = select i1 %cmpxy, i8 %nx, i8 %ny
+  call void @extra_use(i8 %nx)
+  ret i8 %minxy
+}
+
+define i8 @umin_not_2_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @umin_not_2_extra_use(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[NY:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[CMPXY:%.*]] = icmp ult i8 [[NX]], [[NY]]
+; CHECK-NEXT:    [[MINXY:%.*]] = select i1 [[CMPXY]], i8 [[NX]], i8 [[NY]]
+; CHECK-NEXT:    call void @extra_use(i8 [[NX]])
+; CHECK-NEXT:    call void @extra_use(i8 [[NY]])
+; CHECK-NEXT:    ret i8 [[MINXY]]
+;
+  %nx = xor i8 %x, -1
+  %ny = xor i8 %y, -1
+  %cmpxy = icmp ult i8 %nx, %ny
+  %minxy = select i1 %cmpxy, i8 %nx, i8 %ny
+  call void @extra_use(i8 %nx)
+  call void @extra_use(i8 %ny)
+  ret i8 %minxy
+}
+
+; PR35834 - https://bugs.llvm.org/show_bug.cgi?id=35834
+
+define i8 @umin3_not(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @umin3_not(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[Z:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Z]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    [[R_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[R_V]], -1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %ny = xor i8 %y, -1
+  %nz = xor i8 %z, -1
+  %cmpyx = icmp ult i8 %y, %x
+  %cmpxz = icmp ult i8 %nx, %nz
+  %minxz = select i1 %cmpxz, i8 %nx, i8 %nz
+  %cmpyz = icmp ult i8 %ny, %nz
+  %minyz = select i1 %cmpyz, i8 %ny, i8 %nz
+  %r = select i1 %cmpyx, i8 %minxz, i8 %minyz
+  ret i8 %r
+}
+
+; PR35875 - https://bugs.llvm.org/show_bug.cgi?id=35875
+
+define i8 @umin3_not_more_uses(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @umin3_not_more_uses(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[NY:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[Z]], i8 [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[TMP4]], -1
+; CHECK-NEXT:    call void @extra_use(i8 [[NX]])
+; CHECK-NEXT:    call void @extra_use(i8 [[NY]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %ny = xor i8 %y, -1
+  %nz = xor i8 %z, -1
+  %cmpxz = icmp ult i8 %nx, %nz
+  %minxz = select i1 %cmpxz, i8 %nx, i8 %nz
+  %cmpyz = icmp ult i8 %ny, %nz
+  %minyz = select i1 %cmpyz, i8 %ny, i8 %nz
+  %cmpyx = icmp ult i8 %y, %x
+  %r = select i1 %cmpyx, i8 %minxz, i8 %minyz
+  call void @extra_use(i8 %nx)
+  call void @extra_use(i8 %ny)
+  ret i8 %r
+}
+
+declare void @use8(i8)
+
+define i8 @umin3_not_all_ops_extra_uses(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @umin3_not_all_ops_extra_uses(
+; CHECK-NEXT:    [[XN:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[YN:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[ZN:%.*]] = xor i8 [[Z:%.*]], -1
+; CHECK-NEXT:    [[CMPXZ:%.*]] = icmp ult i8 [[XN]], [[ZN]]
+; CHECK-NEXT:    [[MINXZ:%.*]] = select i1 [[CMPXZ]], i8 [[XN]], i8 [[ZN]]
+; CHECK-NEXT:    [[CMPXYZ:%.*]] = icmp ult i8 [[MINXZ]], [[YN]]
+; CHECK-NEXT:    [[MINXYZ:%.*]] = select i1 [[CMPXYZ]], i8 [[MINXZ]], i8 [[YN]]
+; CHECK-NEXT:    call void @use8(i8 [[XN]])
+; CHECK-NEXT:    call void @use8(i8 [[YN]])
+; CHECK-NEXT:    call void @use8(i8 [[ZN]])
+; CHECK-NEXT:    ret i8 [[MINXYZ]]
+;
+  %xn = xor i8 %x, -1
+  %yn = xor i8 %y, -1
+  %zn = xor i8 %z, -1
+  %cmpxz = icmp ult i8 %xn, %zn
+  %minxz = select i1 %cmpxz, i8 %xn, i8 %zn
+  %cmpxyz = icmp ult i8 %minxz, %yn
+  %minxyz = select i1 %cmpxyz, i8 %minxz, i8 %yn
+  call void @use8(i8 %xn)
+  call void @use8(i8 %yn)
+  call void @use8(i8 %zn)
+  ret i8 %minxyz
+}
+
+define i32 @compute_min_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @compute_min_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %not_x = sub i32 -1, %x
+  %not_y = sub i32 -1, %y
+  %not_z = sub i32 -1, %z
+  %cmp_1 = icmp sgt i32 %not_x, %not_y
+  %not_min_1 = select i1 %cmp_1, i32 %not_x, i32 %not_y
+  %cmp_2 = icmp sgt i32 %not_min_1, %not_z
+  %not_min_2 = select i1 %cmp_2, i32 %not_min_1, i32 %not_z
+  %min = sub i32 -1, %not_min_2
+  ret i32 %min
+}
+
+; Don't increase the critical path by moving the 'not' op after the 'select'.
+
+define i32 @compute_min_arithmetic(i32 %x, i32 %y) {
+; CHECK-LABEL: @compute_min_arithmetic(
+; CHECK-NEXT:    [[NOT_VALUE:%.*]] = sub i32 3, [[X:%.*]]
+; CHECK-NEXT:    [[NOT_Y:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[NOT_VALUE]], [[NOT_Y]]
+; CHECK-NEXT:    [[NOT_MIN:%.*]] = select i1 [[CMP]], i32 [[NOT_VALUE]], i32 [[NOT_Y]]
+; CHECK-NEXT:    ret i32 [[NOT_MIN]]
+;
+  %not_value = sub i32 3, %x
+  %not_y = sub i32 -1, %y
+  %cmp = icmp sgt i32 %not_value, %not_y
+  %not_min = select i1 %cmp, i32 %not_value, i32 %not_y
+  ret i32 %not_min
+}
+
+declare void @fake_use(i32)
+
+define i32 @compute_min_pessimization(i32 %x, i32 %y) {
+; CHECK-LABEL: @compute_min_pessimization(
+; CHECK-NEXT:    [[NOT_VALUE:%.*]] = sub i32 3, [[X:%.*]]
+; CHECK-NEXT:    call void @fake_use(i32 [[NOT_VALUE]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], -4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[MIN]]
+;
+  %not_value = sub i32 3, %x
+  call void @fake_use(i32 %not_value)
+  %not_y = sub i32 -1, %y
+  %cmp = icmp sgt i32 %not_value, %not_y
+  %not_min = select i1 %cmp, i32 %not_value, i32 %not_y
+  %min = sub i32 -1, %not_min
+  ret i32 %min
+}
+
+define i32 @max_of_nots(i32 %x, i32 %y) {
+; CHECK-LABEL: @max_of_nots(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[X]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP4]], -1
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %c0 = icmp sgt i32 %y, 0
+  %xor_y = xor i32 %y, -1
+  %s0 = select i1 %c0, i32 %xor_y, i32 -1
+  %xor_x = xor i32 %x, -1
+  %c1 = icmp slt i32 %s0, %xor_x
+  %smax96 = select i1 %c1, i32 %xor_x, i32 %s0
+  ret i32 %smax96
+}
+
+ ; negative test case (i.e. can not simplify) : ABS(MIN(NOT x,y))
+define i32 @abs_of_min_of_not(i32 %x, i32 %y) {
+; CHECK-LABEL: @abs_of_min_of_not(
+; CHECK-NEXT:    [[XORD:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[YADD:%.*]] = add i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[COND_I:%.*]] = icmp slt i32 [[YADD]], [[XORD]]
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[COND_I]], i32 [[YADD]], i32 [[XORD]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[MIN]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 0, [[MIN]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP2]], i32 [[SUB]], i32 [[MIN]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+
+  %xord = xor i32 %x, -1
+  %yadd = add i32 %y, 2
+  %cond.i = icmp sge i32 %yadd, %xord
+  %min = select i1 %cond.i, i32 %xord, i32 %yadd
+  %cmp2 = icmp sgt i32 %min, -1
+  %sub = sub i32 0, %min
+  %abs = select i1 %cmp2, i32 %min, i32 %sub
+  ret i32  %abs
+}
+
+define <2 x i32> @max_of_nots_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @max_of_nots_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i32> [[Y:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[Y]], <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP2]], <2 x i32> [[X]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %c0 = icmp sgt <2 x i32> %y, zeroinitializer
+  %xor_y = xor <2 x i32> %y, <i32 -1, i32 -1>
+  %s0 = select <2 x i1> %c0, <2 x i32> %xor_y, <2 x i32> <i32 -1, i32 -1>
+  %xor_x = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %c1 = icmp slt <2 x i32> %s0, %xor_x
+  %smax96 = select <2 x i1> %c1, <2 x i32> %xor_x, <2 x i32> %s0
+  ret <2 x i32> %smax96
+}
+
+define <2 x i37> @max_of_nots_weird_type_vec(<2 x i37> %x, <2 x i37> %y) {
+; CHECK-LABEL: @max_of_nots_weird_type_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i37> [[Y:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i37> [[Y]], <2 x i37> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <2 x i37> [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x i37> [[TMP2]], <2 x i37> [[X]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i37> [[TMP4]], <i37 -1, i37 -1>
+; CHECK-NEXT:    ret <2 x i37> [[TMP5]]
+;
+  %c0 = icmp sgt <2 x i37> %y, zeroinitializer
+  %xor_y = xor <2 x i37> %y, <i37 -1, i37 -1>
+  %s0 = select <2 x i1> %c0, <2 x i37> %xor_y, <2 x i37> <i37 -1, i37 -1>
+  %xor_x = xor <2 x i37> %x, <i37 -1, i37 -1>
+  %c1 = icmp slt <2 x i37> %s0, %xor_x
+  %smax96 = select <2 x i1> %c1, <2 x i37> %xor_x, <2 x i37> %s0
+  ret <2 x i37> %smax96
+}
+
+; max(min(%a, -1), -1) == -1
+define i32 @max_of_min(i32 %a) {
+; CHECK-LABEL: @max_of_min(
+; CHECK-NEXT:    ret i32 -1
+;
+  %not_a = xor i32 %a, -1
+  %c0 = icmp sgt i32 %a, 0
+  %s0 = select i1 %c0, i32 %not_a, i32 -1
+  %c1 = icmp sgt i32 %s0, -1
+  %s1 = select i1 %c1, i32 %s0, i32 -1
+  ret i32 %s1
+}
+
+; max(min(%a, -1), -1) == -1 (swap predicate and select ops)
+define i32 @max_of_min_swap(i32 %a) {
+; CHECK-LABEL: @max_of_min_swap(
+; CHECK-NEXT:    ret i32 -1
+;
+  %not_a = xor i32 %a, -1
+  %c0 = icmp slt i32 %a, 0
+  %s0 = select i1 %c0, i32 -1, i32 %not_a
+  %c1 = icmp sgt i32 %s0, -1
+  %s1 = select i1 %c1, i32 %s0, i32 -1
+  ret i32 %s1
+}
+
+; min(max(%a, -1), -1) == -1
+define i32 @min_of_max(i32 %a) {
+; CHECK-LABEL: @min_of_max(
+; CHECK-NEXT:    ret i32 -1
+;
+  %not_a = xor i32 %a, -1
+  %c0 = icmp slt i32 %a, 0
+  %s0 = select i1 %c0, i32 %not_a, i32 -1
+  %c1 = icmp slt i32 %s0, -1
+  %s1 = select i1 %c1, i32 %s0, i32 -1
+  ret i32 %s1
+}
+
+; min(max(%a, -1), -1) == -1 (swap predicate and select ops)
+define i32 @min_of_max_swap(i32 %a) {
+; CHECK-LABEL: @min_of_max_swap(
+; CHECK-NEXT:    ret i32 -1
+;
+  %not_a = xor i32 %a, -1
+  %c0 = icmp sgt i32 %a, 0
+  %s0 = select i1 %c0, i32 -1, i32 %not_a
+  %c1 = icmp slt i32 %s0, -1
+  %s1 = select i1 %c1, i32 %s0, i32 -1
+  ret i32 %s1
+}
+
+define <2 x i32> @max_of_min_vec(<2 x i32> %a) {
+; CHECK-LABEL: @max_of_min_vec(
+; CHECK-NEXT:    ret <2 x i32> <i32 -1, i32 -1>
+;
+  %not_a = xor <2 x i32> %a, <i32 -1, i32 -1>
+  %c0 = icmp sgt <2 x i32> %a, zeroinitializer
+  %s0 = select <2 x i1> %c0, <2 x i32> %not_a, <2 x i32> <i32 -1, i32 -1>
+  %c1 = icmp sgt <2 x i32> %s0, <i32 -1, i32 -1>
+  %s1 = select <2 x i1> %c1, <2 x i32> %s0, <2 x i32> <i32 -1, i32 -1>
+  ret <2 x i32> %s1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/max_known_bits.ll b/llvm/test/Transforms/InstCombine/max_known_bits.ll
new file mode 100644
index 00000000000..86c4d2530c3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/max_known_bits.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i16 @foo(i16 %x)  {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[T1:%.*]] = and i16 [[X:%.*]], 255
+; CHECK-NEXT:    ret i16 [[T1]]
+;
+  %t1 = and i16 %x, 255
+  %t2 = zext i16 %t1 to i32
+  %t3 = icmp ult i32 %t2, 255
+  %t4 = select i1 %t3, i32 %t2, i32 255
+  %t5 = trunc i32 %t4 to i16
+  %t6 = and i16 %t5, 255
+  ret i16 %t6
+}
+
+; This contains a min/max pair to clamp a value to 12 bits.
+; By analyzing the clamp pattern, we can tell the add doesn't have signed overflow.
+define i16 @min_max_clamp(i16 %x) {
+; CHECK-LABEL: @min_max_clamp(
+; CHECK-NEXT:    [[A:%.*]] = icmp sgt i16 [[X:%.*]], -2048
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 -2048
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i16 [[B]], 2047
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 2047
+; CHECK-NEXT:    [[E:%.*]] = add nsw i16 [[D]], 1
+; CHECK-NEXT:    ret i16 [[E]]
+;
+  %a = icmp sgt i16 %x, -2048
+  %b = select i1 %a, i16 %x, i16 -2048
+  %c = icmp slt i16 %b, 2047
+  %d = select i1 %c, i16 %b, i16 2047
+  %e = add i16 %d, 1
+  ret i16 %e
+}
+
+; Same as above with min/max reversed.
+define i16 @min_max_clamp_2(i16 %x) {
+; CHECK-LABEL: @min_max_clamp_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp slt i16 [[X:%.*]], 2047
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 2047
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i16 [[B]], -2048
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 -2048
+; CHECK-NEXT:    [[E:%.*]] = add nsw i16 [[D]], 1
+; CHECK-NEXT:    ret i16 [[E]]
+;
+  %a = icmp slt i16 %x, 2047
+  %b = select i1 %a, i16 %x, i16 2047
+  %c = icmp sgt i16 %b, -2048
+  %d = select i1 %c, i16 %b, i16 -2048
+  %e = add i16 %d, 1
+  ret i16 %e
+}
+
+; This contains a min/max pair to clamp a value to 12 bits.
+; By analyzing the clamp pattern, we can tell that the second add doesn't
+; overflow the original type and can be moved before the extend.
+define i32 @min_max_clamp_3(i16 %x) {
+; CHECK-LABEL: @min_max_clamp_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp sgt i16 [[X:%.*]], -2048
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 -2048
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i16 [[B]], 2047
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 2047
+; CHECK-NEXT:    [[G:%.*]] = sext i16 [[D]] to i32
+; CHECK-NEXT:    ret i32 [[G]]
+;
+  %a = icmp sgt i16 %x, -2048
+  %b = select i1 %a, i16 %x, i16 -2048
+  %c = icmp slt i16 %b, 2047
+  %d = select i1 %c, i16 %b, i16 2047
+  %e = add i16 %d, 1
+  %f = sext i16 %e to i32
+  %g = add i32 %f, -1
+  ret i32 %g
+}
+
+; Same as above with min/max order reversed
+define i32 @min_max_clamp_4(i16 %x) {
+; CHECK-LABEL: @min_max_clamp_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp slt i16 [[X:%.*]], 2047
+; CHECK-NEXT:    [[B:%.*]] = select i1 [[A]], i16 [[X]], i16 2047
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i16 [[B]], -2048
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[C]], i16 [[B]], i16 -2048
+; CHECK-NEXT:    [[G:%.*]] = sext i16 [[D]] to i32
+; CHECK-NEXT:    ret i32 [[G]]
+;
+  %a = icmp slt i16 %x, 2047
+  %b = select i1 %a, i16 %x, i16 2047
+  %c = icmp sgt i16 %b, -2048
+  %d = select i1 %c, i16 %b, i16 -2048
+  %e = add i16 %d, 1
+  %f = sext i16 %e to i32
+  %g = add i32 %f, -1
+  ret i32 %g
+}
diff --git a/llvm/test/Transforms/InstCombine/maximum.ll b/llvm/test/Transforms/InstCombine/maximum.ll
new file mode 100644
index 00000000000..bd97a3794d4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/maximum.ll
@@ -0,0 +1,292 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.maximum.f32(float, float)
+declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
+
+declare double @llvm.maximum.f64(double, double)
+declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
+
+define float @constant_fold_maximum_f32() {
+; CHECK-LABEL: @constant_fold_maximum_f32(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %x = call float @llvm.maximum.f32(float 1.0, float 2.0)
+  ret float %x
+}
+
+define float @constant_fold_maximum_f32_inv() {
+; CHECK-LABEL: @constant_fold_maximum_f32_inv(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %x = call float @llvm.maximum.f32(float 2.0, float 1.0)
+  ret float %x
+}
+
+define float @constant_fold_maximum_f32_nan0() {
+; CHECK-LABEL: @constant_fold_maximum_f32_nan0(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %x = call float @llvm.maximum.f32(float 0x7FF8000000000000, float 2.0)
+  ret float %x
+}
+
+define float @constant_fold_maximum_f32_nan1() {
+; CHECK-LABEL: @constant_fold_maximum_f32_nan1(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %x = call float @llvm.maximum.f32(float 2.0, float 0x7FF8000000000000)
+  ret float %x
+}
+
+define float @constant_fold_maximum_f32_nan_nan() {
+; CHECK-LABEL: @constant_fold_maximum_f32_nan_nan(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %x = call float @llvm.maximum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
+  ret float %x
+}
+
+define float @constant_fold_maximum_f32_p0_p0() {
+; CHECK-LABEL: @constant_fold_maximum_f32_p0_p0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %x = call float @llvm.maximum.f32(float 0.0, float 0.0)
+  ret float %x
+}
+
+define float @constant_fold_maximum_f32_p0_n0() {
+; CHECK-LABEL: @constant_fold_maximum_f32_p0_n0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %x = call float @llvm.maximum.f32(float 0.0, float -0.0)
+  ret float %x
+}
+
+define float @constant_fold_maximum_f32_n0_p0() {
+; CHECK-LABEL: @constant_fold_maximum_f32_n0_p0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %x = call float @llvm.maximum.f32(float -0.0, float 0.0)
+  ret float %x
+}
+
+define float @constant_fold_maximum_f32_n0_n0() {
+; CHECK-LABEL: @constant_fold_maximum_f32_n0_n0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %x = call float @llvm.maximum.f32(float -0.0, float -0.0)
+  ret float %x
+}
+
+define <4 x float> @constant_fold_maximum_v4f32() {
+; CHECK-LABEL: @constant_fold_maximum_v4f32(
+; CHECK-NEXT:    ret <4 x float> <float 2.000000e+00, float 8.000000e+00, float 1.000000e+01, float 9.000000e+00>
+;
+  %x = call <4 x float> @llvm.maximum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+define double @constant_fold_maximum_f64() {
+; CHECK-LABEL: @constant_fold_maximum_f64(
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+  %x = call double @llvm.maximum.f64(double 1.0, double 2.0)
+  ret double %x
+}
+
+define double @constant_fold_maximum_f64_nan0() {
+; CHECK-LABEL: @constant_fold_maximum_f64_nan0(
+; CHECK-NEXT:    ret double 0x7FF8000000000000
+;
+  %x = call double @llvm.maximum.f64(double 0x7FF8000000000000, double 2.0)
+  ret double %x
+}
+
+define double @constant_fold_maximum_f64_nan1() {
+; CHECK-LABEL: @constant_fold_maximum_f64_nan1(
+; CHECK-NEXT:    ret double 0x7FF8000000000000
+;
+  %x = call double @llvm.maximum.f64(double 2.0, double 0x7FF8000000000000)
+  ret double %x
+}
+
+define double @constant_fold_maximum_f64_nan_nan() {
+; CHECK-LABEL: @constant_fold_maximum_f64_nan_nan(
+; CHECK-NEXT:    ret double 0x7FF8000000000000
+;
+  %x = call double @llvm.maximum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000)
+  ret double %x
+}
+
+define float @canonicalize_constant_maximum_f32(float %x) {
+; CHECK-LABEL: @canonicalize_constant_maximum_f32(
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    ret float [[Y]]
+;
+  %y = call float @llvm.maximum.f32(float 1.0, float %x)
+  ret float %y
+}
+
+define float @maximum_f32_nan_val(float %x) {
+; CHECK-LABEL: @maximum_f32_nan_val(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %y = call float @llvm.maximum.f32(float 0x7FF8000000000000, float %x)
+  ret float %y
+}
+
+define float @maximum_f32_val_nan(float %x) {
+; CHECK-LABEL: @maximum_f32_val_nan(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %y = call float @llvm.maximum.f32(float %x, float 0x7FF8000000000000)
+  ret float %y
+}
+
+define float @maximum_f32_1_maximum_val_p0(float %x) {
+; CHECK-LABEL: @maximum_f32_1_maximum_val_p0(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.maximum.f32(float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maximum.f32(float %x, float 0.0)
+  %z = call float @llvm.maximum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @maximum_f32_1_maximum_p0_val_fast(float %x) {
+; CHECK-LABEL: @maximum_f32_1_maximum_p0_val_fast(
+; CHECK-NEXT: [[RES:%.*]] = call fast float @llvm.maximum.f32(float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maximum.f32(float 0.0, float %x)
+  %z = call fast float @llvm.maximum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @maximum_f32_1_maximum_p0_val_nnan_ninf(float %x) {
+; CHECK-LABEL: @maximum_f32_1_maximum_p0_val_nnan_ninf(
+; CHECK-NEXT: [[RES:%.*]] = call nnan ninf float @llvm.maximum.f32(float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maximum.f32(float 0.0, float %x)
+  %z = call nnan ninf float @llvm.maximum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @maximum_f32_p0_maximum_val_n0(float %x) {
+; CHECK-LABEL: @maximum_f32_p0_maximum_val_n0(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.maximum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maximum.f32(float %x, float -0.0)
+  %z = call float @llvm.maximum.f32(float %y, float 0.0)
+  ret float %z
+}
+
+define float @maximum_f32_1_maximum_p0_val(float %x) {
+; CHECK-LABEL: @maximum_f32_1_maximum_p0_val(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.maximum.f32(float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maximum.f32(float 0.0, float %x)
+  %z = call float @llvm.maximum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define <2 x float> @maximum_f32_1_maximum_val_p0_val_v2f32(<2 x float> %x) {
+; CHECK-LABEL: @maximum_f32_1_maximum_val_p0_val_v2f32(
+; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+; CHECK-NEXT: ret <2 x float> [[RES]]
+  %y = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
+  %z = call <2 x float> @llvm.maximum.v2f32(<2 x float> %y, <2 x float><float 1.0, float 1.0>)
+  ret <2 x float> %z
+}
+
+define float @maximum4(float %x, float %y, float %z, float %w) {
+; CHECK-LABEL: @maximum4(
+; CHECK-NEXT:    [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call float @llvm.maximum.f32(float [[Z:%.*]], float [[W:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = call float @llvm.maximum.f32(float [[A]], float [[B]])
+; CHECK-NEXT:    ret float [[C]]
+;
+  %a = call float @llvm.maximum.f32(float %x, float %y)
+  %b = call float @llvm.maximum.f32(float %z, float %w)
+  %c = call float @llvm.maximum.f32(float %a, float %b)
+  ret float %c
+}
+
+; PR37404 - https://bugs.llvm.org/show_bug.cgi?id=37404
+
+define <2 x float> @neg_neg(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_neg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.minimum.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %negy = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %r = call <2 x float> @llvm.maximum.v2f32(<2 x float> %negx, <2 x float> %negy)
+  ret <2 x float> %r
+}
+
+; FMF is not required, but it should be propagated from the intrinsic (not the fnegs).
+
+define float @neg_neg_vec_fmf(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_vec_fmf(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fsub arcp float -0.0, %x
+  %negy = fsub afn float -0.0, %y
+  %r = call fast float @llvm.maximum.f32(float %negx, float %negy)
+  ret float %r
+}
+
+; 1 extra use of an intermediate value should still allow the fold,
+; but 2 would require more instructions than we started with.
+
+declare void @use(float)
+define float @neg_neg_extra_use_x(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_extra_use_x(
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.minimum.f32(float [[X]], float [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(float [[NEGX]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fsub float -0.0, %x
+  %negy = fsub float -0.0, %y
+  %r = call float @llvm.maximum.f32(float %negx, float %negy)
+  call void @use(float %negx)
+  ret float %r
+}
+
+define float @neg_neg_extra_use_y(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_extra_use_y(
+; CHECK-NEXT:    [[NEGY:%.*]] = fsub float -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y]])
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(float [[NEGY]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fsub float -0.0, %x
+  %negy = fsub float -0.0, %y
+  %r = call float @llvm.maximum.f32(float %negx, float %negy)
+  call void @use(float %negy)
+  ret float %r
+}
+
+define float @neg_neg_extra_use_x_and_y(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_extra_use_x_and_y(
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[NEGY:%.*]] = fsub float -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = call float @llvm.maximum.f32(float [[NEGX]], float [[NEGY]])
+; CHECK-NEXT:    call void @use(float [[NEGX]])
+; CHECK-NEXT:    call void @use(float [[NEGY]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fsub float -0.0, %x
+  %negy = fsub float -0.0, %y
+  %r = call float @llvm.maximum.f32(float %negx, float %negy)
+  call void @use(float %negx)
+  call void @use(float %negy)
+  ret float %r
+}
diff --git a/llvm/test/Transforms/InstCombine/maxnum.ll b/llvm/test/Transforms/InstCombine/maxnum.ll
new file mode 100644
index 00000000000..d81158c066f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/maxnum.ll
@@ -0,0 +1,293 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.maxnum.f32(float, float)
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+
+declare double @llvm.maxnum.f64(double, double)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+
+define float @constant_fold_maxnum_f32() {
+; CHECK-LABEL: @constant_fold_maxnum_f32(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %x = call float @llvm.maxnum.f32(float 1.0, float 2.0)
+  ret float %x
+}
+
+define float @constant_fold_maxnum_f32_inv() {
+; CHECK-LABEL: @constant_fold_maxnum_f32_inv(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %x = call float @llvm.maxnum.f32(float 2.0, float 1.0)
+  ret float %x
+}
+
+define float @constant_fold_maxnum_f32_nan0() {
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan0(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %x = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 2.0)
+  ret float %x
+}
+
+define float @constant_fold_maxnum_f32_nan1() {
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan1(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %x = call float @llvm.maxnum.f32(float 2.0, float 0x7FF8000000000000)
+  ret float %x
+}
+
+define float @constant_fold_maxnum_f32_nan_nan() {
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan_nan(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %x = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
+  ret float %x
+}
+
+define float @constant_fold_maxnum_f32_p0_p0() {
+; CHECK-LABEL: @constant_fold_maxnum_f32_p0_p0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %x = call float @llvm.maxnum.f32(float 0.0, float 0.0)
+  ret float %x
+}
+
+define float @constant_fold_maxnum_f32_p0_n0() {
+; CHECK-LABEL: @constant_fold_maxnum_f32_p0_n0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %x = call float @llvm.maxnum.f32(float 0.0, float -0.0)
+  ret float %x
+}
+
+define float @constant_fold_maxnum_f32_n0_p0() {
+; CHECK-LABEL: @constant_fold_maxnum_f32_n0_p0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %x = call float @llvm.maxnum.f32(float -0.0, float 0.0)
+  ret float %x
+}
+
+define float @constant_fold_maxnum_f32_n0_n0() {
+; CHECK-LABEL: @constant_fold_maxnum_f32_n0_n0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %x = call float @llvm.maxnum.f32(float -0.0, float -0.0)
+  ret float %x
+}
+
+define <4 x float> @constant_fold_maxnum_v4f32() {
+; CHECK-LABEL: @constant_fold_maxnum_v4f32(
+; CHECK-NEXT:    ret <4 x float> <float 2.000000e+00, float 8.000000e+00, float 1.000000e+01, float 9.000000e+00>
+;
+  %x = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+define double @constant_fold_maxnum_f64() {
+; CHECK-LABEL: @constant_fold_maxnum_f64(
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+  %x = call double @llvm.maxnum.f64(double 1.0, double 2.0)
+  ret double %x
+}
+
+define double @constant_fold_maxnum_f64_nan0() {
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan0(
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+  %x = call double @llvm.maxnum.f64(double 0x7FF8000000000000, double 2.0)
+  ret double %x
+}
+
+define double @constant_fold_maxnum_f64_nan1() {
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan1(
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+  %x = call double @llvm.maxnum.f64(double 2.0, double 0x7FF8000000000000)
+  ret double %x
+}
+
+define double @constant_fold_maxnum_f64_nan_nan() {
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan_nan(
+; CHECK-NEXT:    ret double 0x7FF8000000000000
+;
+  %x = call double @llvm.maxnum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000)
+  ret double %x
+}
+
+define float @canonicalize_constant_maxnum_f32(float %x) {
+; CHECK-LABEL: @canonicalize_constant_maxnum_f32(
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    ret float [[Y]]
+;
+  %y = call float @llvm.maxnum.f32(float 1.0, float %x)
+  ret float %y
+}
+
+define float @maxnum_f32_nan_val(float %x) {
+; CHECK-LABEL: @maxnum_f32_nan_val(
+; CHECK-NEXT:    ret float [[X:%.*]]
+;
+  %y = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %x)
+  ret float %y
+}
+
+define float @maxnum_f32_val_nan(float %x) {
+; CHECK-LABEL: @maxnum_f32_val_nan(
+; CHECK-NEXT:    ret float [[X:%.*]]
+;
+  %y = call float @llvm.maxnum.f32(float %x, float 0x7FF8000000000000)
+  ret float %y
+}
+
+define float @maxnum_f32_1_maxnum_val_p0(float %x) {
+; CHECK-LABEL: @maxnum_f32_1_maxnum_val_p0(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.maxnum.f32(float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maxnum.f32(float %x, float 0.0)
+  %z = call float @llvm.maxnum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @maxnum_f32_1_maxnum_p0_val_fast(float %x) {
+; CHECK-LABEL: @maxnum_f32_1_maxnum_p0_val_fast(
+; CHECK-NEXT: [[RES:%.*]] = call fast float @llvm.maxnum.f32(float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maxnum.f32(float 0.0, float %x)
+  %z = call fast float @llvm.maxnum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @maxnum_f32_1_maxnum_p0_val_nnan_ninf(float %x) {
+; CHECK-LABEL: @maxnum_f32_1_maxnum_p0_val_nnan_ninf(
+; CHECK-NEXT: [[RES:%.*]] = call nnan ninf float @llvm.maxnum.f32(float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maxnum.f32(float 0.0, float %x)
+  %z = call nnan ninf float @llvm.maxnum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @maxnum_f32_p0_maxnum_val_n0(float %x) {
+; CHECK-LABEL: @maxnum_f32_p0_maxnum_val_n0(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.maxnum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maxnum.f32(float %x, float -0.0)
+  %z = call float @llvm.maxnum.f32(float %y, float 0.0)
+  ret float %z
+}
+
+define float @maxnum_f32_1_maxnum_p0_val(float %x) {
+; CHECK-LABEL: @maxnum_f32_1_maxnum_p0_val(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.maxnum.f32(float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.maxnum.f32(float 0.0, float %x)
+  %z = call float @llvm.maxnum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define <2 x float> @maxnum_f32_1_maxnum_val_p0_val_v2f32(<2 x float> %x) {
+; CHECK-LABEL: @maxnum_f32_1_maxnum_val_p0_val_v2f32(
+; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+; CHECK-NEXT: ret <2 x float> [[RES]]
+  %y = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
+  %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %y, <2 x float><float 1.0, float 1.0>)
+  ret <2 x float> %z
+}
+
+define float @maxnum4(float %x, float %y, float %z, float %w) {
+; CHECK-LABEL: @maxnum4(
+; CHECK-NEXT:    [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call float @llvm.maxnum.f32(float [[Z:%.*]], float [[W:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = call float @llvm.maxnum.f32(float [[A]], float [[B]])
+; CHECK-NEXT:    ret float [[C]]
+;
+  %a = call float @llvm.maxnum.f32(float %x, float %y)
+  %b = call float @llvm.maxnum.f32(float %z, float %w)
+  %c = call float @llvm.maxnum.f32(float %a, float %b)
+  ret float %c
+}
+
+; PR37404 - https://bugs.llvm.org/show_bug.cgi?id=37404
+
+define <2 x float> @neg_neg(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_neg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.minnum.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %negy = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %r = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %negx, <2 x float> %negy)
+  ret <2 x float> %r
+}
+
+; FMF is not required, but it should be propagated from the intrinsic (not the fnegs).
+
+define float @neg_neg_vec_fmf(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_vec_fmf(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fsub arcp float -0.0, %x
+  %negy = fsub afn float -0.0, %y
+  %r = call fast float @llvm.maxnum.f32(float %negx, float %negy)
+  ret float %r
+}
+
+; 1 extra use of an intermediate value should still allow the fold,
+; but 2 would require more instructions than we started with.
+
+declare void @use(float)
+define float @neg_neg_extra_use_x(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_extra_use_x(
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(float [[NEGX]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fsub float -0.0, %x
+  %negy = fsub float -0.0, %y
+  %r = call float @llvm.maxnum.f32(float %negx, float %negy)
+  call void @use(float %negx)
+  ret float %r
+}
+
+define float @neg_neg_extra_use_y(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_extra_use_y(
+; CHECK-NEXT:    [[NEGY:%.*]] = fsub float -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y]])
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(float [[NEGY]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fsub float -0.0, %x
+  %negy = fsub float -0.0, %y
+  %r = call float @llvm.maxnum.f32(float %negx, float %negy)
+  call void @use(float %negy)
+  ret float %r
+}
+
+define float @neg_neg_extra_use_x_and_y(float %x, float %y) {
+; CHECK-LABEL: @neg_neg_extra_use_x_and_y(
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[NEGY:%.*]] = fsub float -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = call float @llvm.maxnum.f32(float [[NEGX]], float [[NEGY]])
+; CHECK-NEXT:    call void @use(float [[NEGX]])
+; CHECK-NEXT:    call void @use(float [[NEGY]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fsub float -0.0, %x
+  %negy = fsub float -0.0, %y
+  %r = call float @llvm.maxnum.f32(float %negx, float %negy)
+  call void @use(float %negx)
+  call void @use(float %negy)
+  ret float %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll b/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll
new file mode 100644
index 00000000000..4499051b255
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+@f.b = private unnamed_addr constant [1 x i32] [i32 55], align 4
+@f.c = linkonce unnamed_addr alias [1 x i32], [1 x i32]* @f.b
+
+define signext i32 @test1(i32 signext %x) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @f.a, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test1
+; CHECK: ret i32 12
+}
+
+declare void @foo(i64* %p)
+define void @test2(i32 signext %x, i64 %v) #0 {
+entry:
+  %p = alloca i64
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds i64, i64* %p, i64 %idxprom
+  store i64 %v, i64* %arrayidx
+  call void @foo(i64* %p)
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: %p = alloca i64
+; CHECK: store i64 %v, i64* %p
+; CHECK: ret void
+}
+
+define signext i32 @test3(i32 signext %x, i1 %y) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %p = select i1 %y, [1 x i32]* @f.a, [1 x i32]* @f.b
+  %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* %p, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test3
+; CHECK: getelementptr inbounds [1 x i32], [1 x i32]* %p, i64 0, i64 0
+}
+
+define signext i32 @test4(i32 signext %x, i1 %y) #0 {
+entry:
+  %idxprom = sext i32 %x to i64
+  %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @f.c, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @test4
+; CHECK: getelementptr inbounds [1 x i32], [1 x i32]* @f.c, i64 0, i64 %idxprom
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll b/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
new file mode 100644
index 00000000000..54fe6cb8495
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; Make sure the llvm.access.group meta-data is preserved
+; when a memcpy is replaced with a load+store by instcombine
+;
+; #include <string.h>
+; void test(char* out, long size)
+; {
+;     #pragma clang loop vectorize(assume_safety)
+;     for (long i = 0; i < size; i+=2) {
+;         memcpy(&(out[i]), &(out[i+size]), 2);
+;     }
+; }
+
+; CHECK: for.body:
+; CHECK:  %{{.*}} = load i16, i16* %{{.*}}, align 1, !llvm.access.group !1
+; CHECK:  store i16 %{{.*}}, i16* %{{.*}}, align 1, !llvm.access.group !1
+
+
+; ModuleID = '<stdin>'
+source_filename = "memcpy.pragma.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @_Z4testPcl(i8* %out, i64 %size) #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %add2, %for.inc ]
+  %cmp = icmp slt i64 %i.0, %size
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i8, i8* %out, i64 %i.0
+  %add = add nsw i64 %i.0, %size
+  %arrayidx1 = getelementptr inbounds i8, i8* %out, i64 %add
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %arrayidx1, i64 2, i1 false), !llvm.access.group !4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %add2 = add nsw i64 %i.0, 2
+  br label %for.cond, !llvm.loop !2
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 4.0.0 (cfe/trunk 277751)"}
+!1 = distinct !{!1, !2, !3, !{!"llvm.loop.parallel_accesses", !4}}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.vectorize.enable", i1 true}
+!4 = distinct !{} ; access group
diff --git a/llvm/test/Transforms/InstCombine/memchr.ll b/llvm/test/Transforms/InstCombine/memchr.ll
new file mode 100644
index 00000000000..83073e20676
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memchr.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test that the memchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@hellonull = constant [14 x i8] c"hello\00world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@newlines = constant [3 x i8] c"\0D\0A\00"
+@single = constant [2 x i8] c"\1F\00"
+@spaces = constant [4 x i8] c" \0D\0A\00"
+@negative = constant [3 x i8] c"\FF\FE\00"
+@chp = global i8* zeroinitializer
+
+declare i8* @memchr(i8*, i32, i32)
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 6), i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @memchr(i8* %str, i32 119, i32 14)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    store i8* null, i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %str = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %dst = call i8* @memchr(i8* %str, i32 119, i32 1)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13), i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @memchr(i8* %src, i32 0, i32 14)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test4(i32 %chr) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[DST:%.*]] = call i8* @memchr(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 0), i32 [[CHR:%.*]], i32 14)
+; CHECK-NEXT:    store i8* [[DST]], i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @memchr(i8* %src, i32 %chr, i32 14)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test5() {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13), i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @memchr(i8* %src, i32 65280, i32 14)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test6() {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 6), i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+; Overflow, but we still find the right thing.
+  %dst = call i8* @memchr(i8* %src, i32 119, i32 100)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test7() {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    store i8* null, i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+; Overflow
+  %dst = call i8* @memchr(i8* %src, i32 120, i32 100)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hellonull, i32 0, i32 6), i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %str = getelementptr [14 x i8], [14 x i8]* @hellonull, i32 0, i32 0
+  %dst = call i8* @memchr(i8* %str, i32 119, i32 14)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test9() {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hellonull, i32 0, i32 6), i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %str = getelementptr [14 x i8], [14 x i8]* @hellonull, i32 0, i32 2
+  %dst = call i8* @memchr(i8* %str, i32 119, i32 12)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test10() {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    store i8* null, i8** @chp, align 4
+; CHECK-NEXT:    ret void
+;
+  %str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @memchr(i8* %str, i32 119, i32 6)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+; Check transformation memchr("\r\n", C, 2) != nullptr -> (C & 9216) != 0
+define i1 @test11(i32 %C) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i16
+; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 255
+; CHECK-NEXT:    [[MEMCHR_BOUNDS:%.*]] = icmp ult i16 [[TMP2]], 16
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i16 1, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i16 [[TMP3]], 9216
+; CHECK-NEXT:    [[MEMCHR_BITS:%.*]] = icmp ne i16 [[TMP4]], 0
+; CHECK-NEXT:    [[MEMCHR:%.*]] = and i1 [[MEMCHR_BOUNDS]], [[MEMCHR_BITS]]
+; CHECK-NEXT:    ret i1 [[MEMCHR]]
+;
+  %dst = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @newlines, i64 0, i64 0), i32 %C, i32 2)
+  %cmp = icmp ne i8* %dst, null
+  ret i1 %cmp
+}
+
+; No 64 bits here
+define i1 @test12(i32 %C) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[DST:%.*]] = call i8* @memchr(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i32 0, i32 0), i32 [[C:%.*]], i32 3)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8* [[DST]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %dst = call i8* @memchr(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i64 0, i64 0), i32 %C, i32 3)
+  %cmp = icmp ne i8* %dst, null
+  ret i1 %cmp
+}
+
+define i1 @test13(i32 %C) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[C:%.*]], 255
+; CHECK-NEXT:    [[MEMCHR_BOUNDS:%.*]] = icmp ult i32 [[TMP1]], 32
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 1, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], -2147483647
+; CHECK-NEXT:    [[MEMCHR_BITS:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[MEMCHR:%.*]] = and i1 [[MEMCHR_BOUNDS]], [[MEMCHR_BITS]]
+; CHECK-NEXT:    ret i1 [[MEMCHR]]
+;
+  %dst = call i8* @memchr(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @single, i64 0, i64 0), i32 %C, i32 2)
+  %cmp = icmp ne i8* %dst, null
+  ret i1 %cmp
+}
+
+define i1 @test14(i32 %C) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[C:%.*]], 255
+; CHECK-NEXT:    [[MEMCHR_BITS:%.*]] = icmp eq i32 [[TMP1]], 31
+; CHECK-NEXT:    ret i1 [[MEMCHR_BITS]]
+;
+  %dst = call i8* @memchr(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @single, i64 0, i64 0), i32 %C, i32 1)
+  %cmp = icmp ne i8* %dst, null
+  ret i1 %cmp
+}
+
+define i1 @test15(i32 %C) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[DST:%.*]] = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @negative, i32 0, i32 0), i32 [[C:%.*]], i32 3)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8* [[DST]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %dst = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @negative, i64 0, i64 0), i32 %C, i32 3)
+  %cmp = icmp ne i8* %dst, null
+  ret i1 %cmp
+}
+
+@s = internal constant [1 x i8] [i8 0], align 1
+define i8* @pr32124() {
+; CHECK-LABEL: @pr32124(
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([1 x i8], [1 x i8]* @s, i32 0, i32 0)
+;
+  %res = tail call i8* @memchr(i8* getelementptr ([1 x i8], [1 x i8]* @s, i64 0, i64 0), i32 0, i32 1)
+  ret i8* %res
+}
diff --git a/llvm/test/Transforms/InstCombine/memcmp-1.ll b/llvm/test/Transforms/InstCombine/memcmp-1.ll
new file mode 100644
index 00000000000..a82f861697c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcmp-1.ll
@@ -0,0 +1,151 @@
+; Test that the memcmp library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck --check-prefix=CHECK --check-prefix=NOBCMP %s
+; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix=CHECK --check-prefix=BCMP %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32:64"
+
+@foo = constant [4 x i8] c"foo\00"
+@hel = constant [4 x i8] c"hel\00"
+@hello_u = constant [8 x i8] c"hello_u\00"
+
+declare i32 @memcmp(i8*, i8*, i32)
+
+; Check memcmp(mem, mem, size) -> 0.
+
+define i32 @test_simplify1(i8* %mem, i32 %size) {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT:    ret i32 0
+;
+  %ret = call i32 @memcmp(i8* %mem, i8* %mem, i32 %size)
+  ret i32 %ret
+}
+
+; Check memcmp(mem1, mem2, 0) -> 0.
+
+define i32 @test_simplify2(i8* %mem1, i8* %mem2) {
+; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT:    ret i32 0
+;
+  %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 0)
+  ret i32 %ret
+}
+
+;; Check memcmp(mem1, mem2, 1) -> *(unsigned char*)mem1 - *(unsigned char*)mem2.
+
+define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    [[LHSC:%.*]] = load i8, i8* %mem1, align 1
+; CHECK-NEXT:    [[LHSV:%.*]] = zext i8 [[LHSC]] to i32
+; CHECK-NEXT:    [[RHSC:%.*]] = load i8, i8* %mem2, align 1
+; CHECK-NEXT:    [[RHSV:%.*]] = zext i8 [[RHSC]] to i32
+; CHECK-NEXT:    [[CHARDIFF:%.*]] = sub nsw i32 [[LHSV]], [[RHSV]]
+; CHECK-NEXT:    ret i32 [[CHARDIFF]]
+;
+  %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 1)
+  ret i32 %ret
+}
+
+; Check memcmp(mem1, mem2, size) -> cnst, where all arguments are constants.
+
+define i32 @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+; CHECK-NEXT:    ret i32 0
+;
+  %mem1 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0
+  %mem2 = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0
+  %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
+  ret i32 %ret
+}
+
+define i32 @test_simplify5() {
+; CHECK-LABEL: @test_simplify5(
+; CHECK-NEXT:    ret i32 1
+;
+  %mem1 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0
+  %mem2 = getelementptr [4 x i8], [4 x i8]* @foo, i32 0, i32 0
+  %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
+  ret i32 %ret
+}
+
+define i32 @test_simplify6() {
+; CHECK-LABEL: @test_simplify6(
+; CHECK-NEXT:    ret i32 -1
+;
+  %mem1 = getelementptr [4 x i8], [4 x i8]* @foo, i32 0, i32 0
+  %mem2 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0
+  %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
+  ret i32 %ret
+}
+
+; Check memcmp(mem1, mem2, 8)==0 -> *(int64_t*)mem1 == *(int64_t*)mem2
+
+define i1 @test_simplify7(i64 %x, i64 %y) {
+; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.addr = alloca i64, align 8
+  %y.addr = alloca i64, align 8
+  store i64 %x, i64* %x.addr, align 8
+  store i64 %y, i64* %y.addr, align 8
+  %xptr = bitcast i64* %x.addr to i8*
+  %yptr = bitcast i64* %y.addr to i8*
+  %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 8)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, 4)==0 -> *(int32_t*)mem1 == *(int32_t*)mem2
+
+define i1 @test_simplify8(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_simplify8(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %xptr = bitcast i32* %x.addr to i8*
+  %yptr = bitcast i32* %y.addr to i8*
+  %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 4)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, 2)==0 -> *(int16_t*)mem1 == *(int16_t*)mem2
+
+define i1 @test_simplify9(i16 %x, i16 %y) {
+; CHECK-LABEL: @test_simplify9(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %x.addr = alloca i16, align 2
+  %y.addr = alloca i16, align 2
+  store i16 %x, i16* %x.addr, align 2
+  store i16 %y, i16* %y.addr, align 2
+  %xptr = bitcast i16* %x.addr to i8*
+  %yptr = bitcast i16* %y.addr to i8*
+  %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 2)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, size)==0 -> bcmp(mem1, mem2, size)==0
+
+define i1 @test_simplify10(i8* %mem1, i8* %mem2, i32 %size) {
+; NOBCMP-LABEL: @test_simplify10(
+; NOBCMP-NEXT:    [[CALL:%.*]] = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 %size)
+; NOBCMP-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; NOBCMP-NEXT:    ret i1 [[CMP]]
+;
+; BCMP-LABEL: @test_simplify10(
+; BCMP-NEXT:    [[CALL:%.*]] = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 %size)
+; BCMP-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; BCMP-NEXT:    ret i1 [[CMP]]
+;
+  %call = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 %size)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/memcmp-2.ll b/llvm/test/Transforms/InstCombine/memcmp-2.ll
new file mode 100644
index 00000000000..bed62eb3fb9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcmp-2.ll
@@ -0,0 +1,17 @@
+; Test that the memcmp library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32* @memcmp(i8*, i8*, i32)
+
+; Check that memcmp functions with the wrong prototype aren't simplified.
+
+define i32* @test_no_simplify1(i8* %mem, i32 %size) {
+; CHECK-LABEL: @test_no_simplify1(
+  %ret = call i32* @memcmp(i8* %mem, i8* %mem, i32 %size)
+; CHECK-NEXT: call i32* @memcmp
+  ret i32* %ret
+; CHECK-NEXT: ret i32* %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll b/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll
new file mode 100644
index 00000000000..211b3b5ab2c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcmp-constant-fold.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -instcombine -S -data-layout=e-n32 | FileCheck %s --check-prefix=ALL --check-prefix=LE
+; RUN: opt < %s -instcombine -S -data-layout=E-n32 | FileCheck %s --check-prefix=ALL --check-prefix=BE
+
+declare i32 @memcmp(i8*, i8*, i64)
+
+; The alignment of this constant does not matter. We constant fold the load.
+
+@charbuf = private unnamed_addr constant [4 x i8] [i8 0, i8 0, i8 0, i8 1], align 1
+
+define i1 @memcmp_4bytes_unaligned_constant_i8(i8* align 4 %x) {
+; LE-LABEL: @memcmp_4bytes_unaligned_constant_i8(
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
+; LE-NEXT:    [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4
+; LE-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[LHSV]], 16777216
+; LE-NEXT:    ret i1 [[TMP2]]
+;
+; BE-LABEL: @memcmp_4bytes_unaligned_constant_i8(
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
+; BE-NEXT:    [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4
+; BE-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[LHSV]], 1
+; BE-NEXT:    ret i1 [[TMP2]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4)
+  %cmpeq0 = icmp eq i32 %call, 0
+  ret i1 %cmpeq0
+}
+
+; We still don't care about alignment of the constant. We are not limited to constant folding only i8 arrays.
+; It doesn't matter if the constant operand is the first operand to the memcmp.
+
+@intbuf_unaligned = private unnamed_addr constant [4 x i16] [i16 1, i16 2, i16 3, i16 4], align 1
+
+define i1 @memcmp_4bytes_unaligned_constant_i16(i8* align 4 %x) {
+; LE-LABEL: @memcmp_4bytes_unaligned_constant_i16(
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
+; LE-NEXT:    [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4
+; LE-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[RHSV]], 131073
+; LE-NEXT:    ret i1 [[TMP2]]
+;
+; BE-LABEL: @memcmp_4bytes_unaligned_constant_i16(
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* %x to i32*
+; BE-NEXT:    [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4
+; BE-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[RHSV]], 65538
+; BE-NEXT:    ret i1 [[TMP2]]
+;
+  %call = tail call i32 @memcmp(i8* bitcast (i16* getelementptr inbounds ([4 x i16], [4 x i16]* @intbuf_unaligned, i64 0, i64 0) to i8*), i8* %x, i64 4)
+  %cmpeq0 = icmp eq i32 %call, 0
+  ret i1 %cmpeq0
+}
+
+; TODO: Any memcmp where all arguments are constants should be constant folded. Currently, we only handle i8 array constants.
+
+@intbuf = private unnamed_addr constant [2 x i32] [i32 0, i32 1], align 4
+
+define i1 @memcmp_3bytes_aligned_constant_i32(i8* align 4 %x) {
+; ALL-LABEL: @memcmp_3bytes_aligned_constant_i32(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* bitcast (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @intbuf, i64 0, i64 1) to i8*), i8* bitcast ([2 x i32]* @intbuf to i8*), i64 3)
+; ALL-NEXT:    [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    ret i1 [[CMPEQ0]]
+;
+  %call = tail call i32 @memcmp(i8* bitcast (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @intbuf, i64 0, i64 1) to i8*), i8* bitcast (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @intbuf, i64 0, i64 0) to i8*), i64 3)
+  %cmpeq0 = icmp eq i32 %call, 0
+  ret i1 %cmpeq0
+}
+
+; A sloppy implementation would infinite loop by recreating the unused instructions.
+
+define i1 @memcmp_4bytes_one_unaligned_i8(i8* align 4 %x, i8* align 1 %y) {
+; ALL-LABEL: @memcmp_4bytes_one_unaligned_i8(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+; ALL-NEXT:    [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    ret i1 [[CMPEQ0]]
+;
+  %bc = bitcast i8* %x to i32*
+  %lhsv = load i32, i32* %bc
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+  %cmpeq0 = icmp eq i32 %call, 0
+  ret i1 %cmpeq0
+}
+
diff --git a/llvm/test/Transforms/InstCombine/memcpy-1.ll b/llvm/test/Transforms/InstCombine/memcpy-1.ll
new file mode 100644
index 00000000000..dceb7c39286
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy-1.ll
@@ -0,0 +1,28 @@
+; Test that the memcpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8* @memcpy(i8*, i8*, i32)
+
+; Check memcpy(mem1, mem2, size) -> llvm.memcpy(mem1, mem2, size, 1).
+
+define i8* @test_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %mem1, i8* align 1 %mem2, i32 %size, i1 false)
+; CHECK-NEXT:    ret i8* %mem1
+;
+  %ret = call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size)
+  ret i8* %ret
+}
+
+; Verify that the strictfp attr doesn't block this optimization.
+
+define i8* @test_simplify2(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_simplify2(
+  %ret = call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size) strictfp
+; CHECK: call void @llvm.memcpy
+  ret i8* %ret
+; CHECK: ret i8* %mem1
+}
diff --git a/llvm/test/Transforms/InstCombine/memcpy-2.ll b/llvm/test/Transforms/InstCombine/memcpy-2.ll
new file mode 100644
index 00000000000..12c68965b35
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy-2.ll
@@ -0,0 +1,18 @@
+; Test that the memcpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8 @memcpy(i8*, i8*, i32)
+
+; Check that memcpy functions with the wrong prototype (doesn't return a pointer) aren't simplified.
+
+define i8 @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @memcpy(i8* %mem1, i8* %mem2, i32 %size)
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %ret = call i8 @memcpy(i8* %mem1, i8* %mem2, i32 %size)
+  ret i8 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/memcpy-addrspace.ll b/llvm/test/Transforms/InstCombine/memcpy-addrspace.ll
new file mode 100644
index 00000000000..b57a24e58b0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy-addrspace.ll
@@ -0,0 +1,125 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@test.data = private unnamed_addr addrspace(2) constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 4
+
+; CHECK-LABEL: test_load
+; CHECK: %[[GEP:.*]] = getelementptr [8 x i32], [8 x i32] addrspace(2)* @test.data, i64 0, i64 %x
+; CHECK: %{{.*}} = load i32, i32 addrspace(2)* %[[GEP]]
+; CHECK-NOT: alloca
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK-NOT: addrspacecast
+; CHECK-NOT: load i32, i32*
+define void @test_load(i32 addrspace(1)* %out, i64 %x) {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 4 %0, i8 addrspace(2)* align 4 bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i1 false)
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  ret void
+}
+
+; CHECK-LABEL: test_load_bitcast_chain
+; CHECK: %[[GEP:.*]] = getelementptr [8 x i32], [8 x i32] addrspace(2)* @test.data, i64 0, i64 %x
+; CHECK: %{{.*}} = load i32, i32 addrspace(2)* %[[GEP]]
+; CHECK-NOT: alloca
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK-NOT: addrspacecast
+; CHECK-NOT: load i32, i32*
+define void @test_load_bitcast_chain(i32 addrspace(1)* %out, i64 %x) {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 4 %0, i8 addrspace(2)* align 4 bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i1 false)
+  %1 = bitcast i8* %0 to i32*
+  %arrayidx = getelementptr inbounds i32, i32* %1, i64 %x
+  %2 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %2, i32 addrspace(1)* %arrayidx1, align 4
+  ret void
+}
+
+; CHECK-LABEL: test_call
+; CHECK: alloca
+; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK-NOT: addrspacecast
+; CHECK: call i32 @foo(i32* nonnull %{{.*}})
+define void @test_call(i32 addrspace(1)* %out, i64 %x) {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 4 %0, i8 addrspace(2)* align 4 bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i1 false)
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
+  %1 = call i32 @foo(i32* %arrayidx)
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  ret void
+}
+
+; CHECK-LABEL: test_call_no_null_opt
+; CHECK: alloca
+; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK-NOT: addrspacecast
+; CHECK: call i32 @foo(i32* %{{.*}})
+define void @test_call_no_null_opt(i32 addrspace(1)* %out, i64 %x) #0 {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 4 %0, i8 addrspace(2)* align 4 bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i1 false)
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
+  %1 = call i32 @foo(i32* %arrayidx)
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  ret void
+}
+
+; CHECK-LABEL: test_load_and_call
+; CHECK: alloca
+; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK: load i32, i32* %{{.*}}
+; CHECK: call i32 @foo(i32* nonnull %{{.*}})
+; CHECK-NOT: addrspacecast
+; CHECK-NOT: load i32, i32 addrspace(2)*
+define void @test_load_and_call(i32 addrspace(1)* %out, i64 %x, i64 %y) {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 4 %0, i8 addrspace(2)* align 4 bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i1 false)
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  %2 = call i32 @foo(i32* %arrayidx)
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %y
+  store i32 %2, i32 addrspace(1)* %arrayidx2, align 4
+  ret void
+}
+
+; CHECK-LABEL: test_load_and_call_no_null_opt
+; CHECK: alloca
+; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64
+; CHECK: load i32, i32* %{{.*}}
+; CHECK: call i32 @foo(i32* %{{.*}})
+; CHECK-NOT: addrspacecast
+; CHECK-NOT: load i32, i32 addrspace(2)*
+define void @test_load_and_call_no_null_opt(i32 addrspace(1)* %out, i64 %x, i64 %y) #0 {
+entry:
+  %data = alloca [8 x i32], align 4
+  %0 = bitcast [8 x i32]* %data to i8*
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* align 4 %0, i8 addrspace(2)* align 4 bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i1 false)
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x
+  store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
+  %2 = call i32 @foo(i32* %arrayidx)
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %y
+  store i32 %2, i32 addrspace(1)* %arrayidx2, align 4
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture writeonly, i8 addrspace(2)* nocapture readonly, i64, i1)
+declare i32 @foo(i32* %x)
+
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
new file mode 100644
index 00000000000..c14b1a70fd0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -0,0 +1,259 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+@C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32		; <[128 x float]*> [#uses=1]
+
+define float @test1(i32 %hash, float %x, float %y, float %z, float %w) {
+entry:
+	%lookupTable = alloca [128 x float], align 16		; <[128 x float]*> [#uses=5]
+	%lookupTable1 = bitcast [128 x float]* %lookupTable to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %lookupTable1, i8* align 16 bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i1 false)
+
+; CHECK-LABEL: @test1(
+; CHECK-NOT: alloca
+; CHECK-NOT: call{{.*}}@llvm.memcpy
+
+	%tmp3 = shl i32 %hash, 2		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp3, 124		; <i32> [#uses=4]
+	%tmp753 = getelementptr [128 x float], [128 x float]* %lookupTable, i32 0, i32 %tmp5		; <float*> [#uses=1]
+	%tmp9 = load float, float* %tmp753		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %x		; <float> [#uses=1]
+	%tmp13 = fadd float %tmp11, 0.000000e+00		; <float> [#uses=1]
+	%tmp17.sum52 = or i32 %tmp5, 1		; <i32> [#uses=1]
+	%tmp1851 = getelementptr [128 x float], [128 x float]* %lookupTable, i32 0, i32 %tmp17.sum52		; <float*> [#uses=1]
+	%tmp19 = load float, float* %tmp1851		; <float> [#uses=1]
+	%tmp21 = fmul float %tmp19, %y		; <float> [#uses=1]
+	%tmp23 = fadd float %tmp21, %tmp13		; <float> [#uses=1]
+	%tmp27.sum50 = or i32 %tmp5, 2		; <i32> [#uses=1]
+	%tmp2849 = getelementptr [128 x float], [128 x float]* %lookupTable, i32 0, i32 %tmp27.sum50		; <float*> [#uses=1]
+	%tmp29 = load float, float* %tmp2849		; <float> [#uses=1]
+	%tmp31 = fmul float %tmp29, %z		; <float> [#uses=1]
+	%tmp33 = fadd float %tmp31, %tmp23		; <float> [#uses=1]
+	%tmp37.sum48 = or i32 %tmp5, 3		; <i32> [#uses=1]
+	%tmp3847 = getelementptr [128 x float], [128 x float]* %lookupTable, i32 0, i32 %tmp37.sum48		; <float*> [#uses=1]
+	%tmp39 = load float, float* %tmp3847		; <float> [#uses=1]
+	%tmp41 = fmul float %tmp39, %w		; <float> [#uses=1]
+	%tmp43 = fadd float %tmp41, %tmp33		; <float> [#uses=1]
+	ret float %tmp43
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
+
+%T = type { i8, [123 x i8] }
+%U = type { i32, i32, i32, i32, i32 }
+
+@G = constant %T {i8 1, [123 x i8] zeroinitializer }
+@H = constant [2 x %U] zeroinitializer, align 16
+
+define void @test2() {
+  %A = alloca %T
+  %B = alloca %T
+  %a = bitcast %T* %A to i8*
+  %b = bitcast %T* %B to i8*
+
+; CHECK-LABEL: @test2(
+
+; %A alloca is deleted
+; CHECK-NEXT: alloca [124 x i8]
+; CHECK-NEXT: getelementptr inbounds [124 x i8], [124 x i8]*
+
+; use @G instead of %A
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %{{.*}}, i8* align 16 getelementptr inbounds (%T, %T* @G, i64 0, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %b, i8* align 4 %a, i64 124, i1 false)
+  call void @bar(i8* %b)
+  ret void
+}
+
+define void @test2_no_null_opt() #0 {
+  %A = alloca %T
+  %B = alloca %T
+  %a = bitcast %T* %A to i8*
+  %b = bitcast %T* %B to i8*
+
+; CHECK-LABEL: @test2_no_null_opt(
+
+; %A alloca is deleted
+; CHECK-NEXT: alloca [124 x i8]
+; CHECK-NEXT: getelementptr inbounds [124 x i8], [124 x i8]*
+
+; use @G instead of %A
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 16 getelementptr inbounds (%T, %T* @G, i64 0, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %b, i8* align 4 %a, i64 124, i1 false)
+  call void @bar(i8* %b)
+  ret void
+}
+
+define void @test2_addrspacecast() {
+  %A = alloca %T
+  %B = alloca %T
+  %a = addrspacecast %T* %A to i8 addrspace(1)*
+  %b = addrspacecast %T* %B to i8 addrspace(1)*
+
+; CHECK-LABEL: @test2_addrspacecast(
+
+; %A alloca is deleted
+; This doesn't exactly match what test2 does, because folding the type
+; cast into the alloca doesn't work for the addrspacecast yet.
+; CHECK-NEXT: alloca [124 x i8]
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: addrspacecast
+
+; use @G instead of %A
+; CHECK-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %{{.*}},
+  call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* align 4 %a, i8* align 4 bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %b, i8 addrspace(1)* align 4 %a, i64 124, i1 false)
+  call void @bar_as1(i8 addrspace(1)* %b)
+  ret void
+}
+
+declare void @bar(i8*)
+declare void @bar_as1(i8 addrspace(1)*)
+
+
+;; Should be able to eliminate the alloca.
+define void @test3() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
+  ret void
+}
+
+define void @test3_addrspacecast() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 %a, i8 addrspace(1)* align 4 addrspacecast (%T* @G to i8 addrspace(1)*), i64 124, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test3_addrspacecast(
+; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
+  ret void
+}
+
+
+define void @test4() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @baz(i8* byval %a)
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8*)
+define void @test5() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %a)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @baz(i8* byval %a)
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
+  ret void
+}
+
+
+declare void @baz(i8* byval)
+
+
+define void @test6() {
+  %A = alloca %U, align 16
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 bitcast ([2 x %U]* @H to i8*), i64 20, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
+  ret void
+}
+
+define void @test7() {
+  %A = alloca %U, align 16
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
+  ret void
+}
+
+define void @test8() {
+  %A = alloca %U, align 16
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test8(
+; CHECK: llvm.memcpy
+; CHECK: bar
+  ret void
+}
+
+
+define void @test8_addrspacecast() {
+  %A = alloca %U, align 16
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 %a, i8 addrspace(1)* align 4 addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test8_addrspacecast(
+; CHECK: llvm.memcpy
+; CHECK: bar
+  ret void
+}
+
+define void @test9() {
+  %A = alloca %U, align 4
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*))
+  ret void
+}
+
+define void @test9_addrspacecast() {
+  %A = alloca %U, align 4
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 %a, i8 addrspace(1)* align 4 addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK-LABEL: @test9_addrspacecast(
+; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*))
+  ret void
+}
+
+@bbb = local_unnamed_addr global [1000000 x i8] zeroinitializer, align 16
+@_ZL3KKK = internal unnamed_addr constant [3 x i8] c"\01\01\02", align 1
+
+; Should not replace alloca with global because of size mismatch.
+define void @test9_small_global() {
+; CHECK-LABEL: @test9_small_global(
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK, 
+; CHECK: alloca [1000000 x i8]
+entry:
+  %cc = alloca [1000000 x i8], align 16
+  %cc.0..sroa_idx = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i64 0, i64 0
+  %arraydecay = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* align 16 %arraydecay, i64 1000000, i1 false)
+  ret void
+}
+
+; Should replace alloca with global as they have exactly the same size.
+define void @test10_same_global() {
+; CHECK-LABEL: @test10_same_global(
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK,{{.*}}, i64 3,
+entry:
+  %cc = alloca [3 x i8], align 1
+  %cc.0..sroa_idx = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i64 0, i64 0
+  %arraydecay = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 3, i1 false)
+  ret void
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/InstCombine/memcpy-to-load.ll b/llvm/test/Transforms/InstCombine/memcpy-to-load.ll
new file mode 100644
index 00000000000..614ae186406
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S                         | FileCheck %s --check-prefix=ALL --check-prefix=NODL
+; RUN: opt < %s -instcombine -S -data-layout=n32        | FileCheck %s --check-prefix=ALL --check-prefix=I32
+; RUN: opt < %s -instcombine -S -data-layout=n32:64     | FileCheck %s --check-prefix=ALL --check-prefix=I64
+; RUN: opt < %s -instcombine -S -data-layout=n32:64:128 | FileCheck %s --check-prefix=ALL --check-prefix=I128
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+
+; memcpy can be expanded inline with load/store (based on the datalayout?)
+
+define void @copy_1_byte(i8* %d, i8* %s) {
+; ALL-LABEL: @copy_1_byte(
+; ALL-NEXT:    [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1
+; ALL-NEXT:    store i8 [[TMP1]], i8* [[D:%.*]], align 1
+; ALL-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 1, i1 false)
+  ret void
+}
+
+define void @copy_2_bytes(i8* %d, i8* %s) {
+; ALL-LABEL: @copy_2_bytes(
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i16*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i16*
+; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
+; ALL-NEXT:    store i16 [[TMP3]], i16* [[TMP2]], align 1
+; ALL-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 2, i1 false)
+  ret void
+}
+
+; We don't expand small non-power-of-2. Should we? Might be a target-dependent choice.
+
+define void @copy_3_bytes(i8* %d, i8* %s) {
+; ALL-LABEL: @copy_3_bytes(
+; ALL-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i32 3, i1 false)
+; ALL-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 3, i1 false)
+  ret void
+}
+
+define void @copy_4_bytes(i8* %d, i8* %s) {
+; ALL-LABEL: @copy_4_bytes(
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i32*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i32*
+; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
+; ALL-NEXT:    store i32 [[TMP3]], i32* [[TMP2]], align 1
+; ALL-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 4, i1 false)
+  ret void
+}
+
+; We don't expand small non-power-of-2. Should we? Might be a target-dependent choice.
+
+define void @copy_5_bytes(i8* %d, i8* %s) {
+; ALL-LABEL: @copy_5_bytes(
+; ALL-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i32 5, i1 false)
+; ALL-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 5, i1 false)
+  ret void
+}
+
+define void @copy_8_bytes(i8* %d, i8* %s) {
+; ALL-LABEL: @copy_8_bytes(
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i64*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i64*
+; ALL-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
+; ALL-NEXT:    store i64 [[TMP3]], i64* [[TMP2]], align 1
+; ALL-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 8, i1 false)
+  ret void
+}
+
+define void @copy_16_bytes(i8* %d, i8* %s) {
+; ALL-LABEL: @copy_16_bytes(
+; ALL-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i32 16, i1 false)
+; ALL-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 16, i1 false)
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/memcpy.ll b/llvm/test/Transforms/InstCombine/memcpy.ll
new file mode 100644
index 00000000000..1adb815f612
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+; Same src/dest.
+
+define void @test1(i8* %a) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 false)
+  ret void
+}
+
+; PR8267 - same src/dest, but volatile.
+
+define void @test2(i8* %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[A:%.*]], i8* [[A]], i32 100, i1 true)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 true)
+  ret void
+}
+
+; 17179869184 == 0x400000000 - make sure that doesn't get truncated to 32-bit.
+
+define void @test3(i8* %d, i8* %s) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[D:%.*]], i8* align 4 [[S:%.*]], i64 17179869184, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %d, i8* align 4 %s, i64 17179869184, i1 false)
+  ret void
+}
+
+@UnknownConstant = external constant i128
+
+define void @memcpy_to_constant(i8* %src) {
+; CHECK-LABEL: @memcpy_to_constant(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (i128* @UnknownConstant to i8*), i8* align 1 [[SRC:%.*]], i32 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %dest = bitcast i128* @UnknownConstant to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 16, i1 false)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
new file mode 100644
index 00000000000..a372ef2feca
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -0,0 +1,77 @@
+; Test lib call simplification of __memcpy_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+@t3 = common global %struct.T3 zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define i8* @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T3* @t3 to i8*
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T3* @t3 to i8*), i64 1824, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+  ret i8* %ret
+}
+
+; Check cases where dstlen < len.
+
+define i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %dst = bitcast %struct.T3* @t3 to i8*
+  %src = bitcast %struct.T1* @t1 to i8*
+
+; CHECK-NEXT: %ret = call i8* @__memcpy_chk(i8* bitcast (%struct.T3* @t3 to i8*), i8* bitcast (%struct.T1* @t1 to i8*), i64 2848, i64 1824)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify2() {
+; CHECK-LABEL: @test_no_simplify2(
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: %ret = call i8* @__memcpy_chk(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1024, i64 0)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+  ret i8* %ret
+}
+
+define i8* @test_simplify_return_indcall(i8* ()* %alloc) {
+; CHECK-LABEL: @test_simplify_return_indcall(
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: %dst = call i8* %alloc()
+  %dst = call i8* %alloc()
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+; CHECK-NEXT: ret i8* %dst
+  ret i8* %ret
+}
+
+declare i8* @__memcpy_chk(i8*, i8*, i64, i64)
diff --git a/llvm/test/Transforms/InstCombine/memcpy_chk-2.ll b/llvm/test/Transforms/InstCombine/memcpy_chk-2.ll
new file mode 100644
index 00000000000..320b54f82dc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcpy_chk-2.ll
@@ -0,0 +1,24 @@
+; Test that lib call simplification doesn't simplify __memcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK-LABEL: @test_no_simplify(
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+  call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824)
+  ret void
+}
+
+declare i8* @__memcpy_chk(i8*, i8*, i64)
diff --git a/llvm/test/Transforms/InstCombine/memmove-1.ll b/llvm/test/Transforms/InstCombine/memmove-1.ll
new file mode 100644
index 00000000000..0445a60aedd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memmove-1.ll
@@ -0,0 +1,17 @@
+; Test that the memmove library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8* @memmove(i8*, i8*, i32)
+
+; Check memmove(mem1, mem2, size) -> llvm.memmove(mem1, mem2, size, 1).
+
+define i8* @test_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_simplify1(
+  %ret = call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size)
+; CHECK: call void @llvm.memmove
+  ret i8* %ret
+; CHECK: ret i8* %mem1
+}
diff --git a/llvm/test/Transforms/InstCombine/memmove-2.ll b/llvm/test/Transforms/InstCombine/memmove-2.ll
new file mode 100644
index 00000000000..b20e96bc555
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memmove-2.ll
@@ -0,0 +1,17 @@
+; Test that the memmove library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8 @memmove(i8*, i8*, i32)
+
+; Check that memmove functions with the wrong prototype aren't simplified.
+
+define i8 @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK-LABEL: @test_no_simplify1(
+  %ret = call i8 @memmove(i8* %mem1, i8* %mem2, i32 %size)
+; CHECK: call i8 @memmove
+  ret i8 %ret
+; CHECK: ret i8 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/memmove.ll b/llvm/test/Transforms/InstCombine/memmove.ll
new file mode 100644
index 00000000000..5c695959ccd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memmove.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; This test makes sure that memmove instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <[33 x i8]*> [#uses=1]
+@h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
+@hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
+@hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
+
+define void @test1(i8* %A, i8* %B, i32 %N) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i1 false)
+  ret void
+}
+
+define void @test2(i8* %A, i32 %N) {
+  ;; dest can't alias source since we can't write to source!
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A:%.*]], i8* align 16 getelementptr inbounds ([33 x i8], [33 x i8]* @S, i64 0, i64 0), i32 [[N:%.*]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i1 false)
+  ret void
+}
+
+define i32 @test3([1024 x i8]* %target) { ; arg: [1024 x i8]*> [#uses=1]
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [1024 x i8]* [[TARGET:%.*]] to i16*
+; CHECK-NEXT:    store i16 104, i16* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [1024 x i8]* [[TARGET]] to i32*
+; CHECK-NEXT:    store i32 7103848, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [1024 x i8]* [[TARGET]] to i64*
+; CHECK-NEXT:    store i64 33037504440198504, i64* [[TMP3]], align 8
+; CHECK-NEXT:    ret i32 0
+;
+  %h_p = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0		; <i8*> [#uses=1]
+  %hel_p = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0		; <i8*> [#uses=1]
+  %hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
+  %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* align 2 %target_p, i8* align 2 %h_p, i32 2, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %target_p, i8* align 4 %hel_p, i32 4, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %target_p, i8* align 8 %hello_u_p, i32 8, i1 false)
+  ret i32 0
+}
+
+; PR2370
+define void @test4(i8* %a) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 false)
+  ret void
+}
+
+@UnknownConstant = external constant i128
+
+define void @memmove_to_constant(i8* %src) {
+; CHECK-LABEL: @memmove_to_constant(
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 bitcast (i128* @UnknownConstant to i8*), i8* align 1 [[SRC:%.*]], i32 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %dest = bitcast i128* @UnknownConstant to i8*
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 16, i1 false)
+  ret void
+}
+
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) argmemonly nounwind
diff --git a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
new file mode 100644
index 00000000000..f006985305b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -0,0 +1,64 @@
+; Test lib call simplification of __memmove_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+@t3 = common global %struct.T3 zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define i8* @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
+  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T3* @t3 to i8*
+
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T3* @t3 to i8*), i64 1824, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
+  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+  ret i8* %ret
+}
+
+; Check cases where dstlen < len.
+
+define i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %dst = bitcast %struct.T3* @t3 to i8*
+  %src = bitcast %struct.T1* @t1 to i8*
+
+; CHECK-NEXT: %ret = call i8* @__memmove_chk(i8* bitcast (%struct.T3* @t3 to i8*), i8* bitcast (%struct.T1* @t1 to i8*), i64 2848, i64 1824)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify2() {
+; CHECK-LABEL: @test_no_simplify2(
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: %ret = call i8* @__memmove_chk(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1024, i64 0)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+  ret i8* %ret
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64, i64)
diff --git a/llvm/test/Transforms/InstCombine/memmove_chk-2.ll b/llvm/test/Transforms/InstCombine/memmove_chk-2.ll
new file mode 100644
index 00000000000..adadf905a58
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memmove_chk-2.ll
@@ -0,0 +1,24 @@
+; Test that lib call simplification doesn't simplify __memmove_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK-LABEL: @test_no_simplify(
+  %dst = bitcast %struct.T1* @t1 to i8*
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+  call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824)
+  ret void
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64)
diff --git a/llvm/test/Transforms/InstCombine/memset-1.ll b/llvm/test/Transforms/InstCombine/memset-1.ll
new file mode 100644
index 00000000000..7b6341d34b3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memset-1.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test that the memset library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8* @memset(i8*, i32, i32)
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)
+declare noalias i8* @malloc(i32) #1
+
+; Check memset(mem1, val, size) -> llvm.memset(mem1, val, size, 1).
+
+define i8* @test_simplify1(i8* %mem, i32 %val, i32 %size) {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[VAL:%.*]] to i8
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 1 [[MEM:%.*]], i8 [[TMP1]], i32 [[SIZE:%.*]], i1 false)
+; CHECK-NEXT:    ret i8* [[MEM]]
+;
+  %ret = call i8* @memset(i8* %mem, i32 %val, i32 %size)
+  ret i8* %ret
+}
+
+define i8* @pr25892_lite(i32 %size) #0 {
+; CHECK-LABEL: @pr25892_lite(
+; CHECK-NEXT:    [[CALLOC:%.*]] = call i8* @calloc(i32 1, i32 [[SIZE:%.*]])
+; CHECK-NEXT:    ret i8* [[CALLOC]]
+;
+  %call1 = call i8* @malloc(i32 %size) #1
+  %call2 = call i8* @memset(i8* %call1, i32 0, i32 %size) #1
+  ret i8* %call2
+}
+
+; FIXME: A memset intrinsic should be handled similarly to a memset() libcall.
+
+define i8* @malloc_and_memset_intrinsic(i32 %n) #0 {
+; CHECK-LABEL: @malloc_and_memset_intrinsic(
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @malloc(i32 [[N:%.*]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 1 [[CALL]], i8 0, i32 [[N]], i1 false)
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+  %call = call i8* @malloc(i32 %n)
+  call void @llvm.memset.p0i8.i32(i8* %call, i8 0, i32 %n, i32 1, i1 false)
+  ret i8* %call
+}
+
+; This should not create a calloc and should not crash the compiler.
+
+define i8* @notmalloc_memset(i32 %size, i8*(i32)* %notmalloc) {
+; CHECK-LABEL: @notmalloc_memset(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i8* [[NOTMALLOC:%.*]](i32 [[SIZE:%.*]]) #0
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 1 [[CALL1]], i8 0, i32 [[SIZE]], i1 false)
+; CHECK-NEXT:    ret i8* [[CALL1]]
+;
+  %call1 = call i8* %notmalloc(i32 %size) #1
+  %call2 = call i8* @memset(i8* %call1, i32 0, i32 %size) #1
+  ret i8* %call2
+}
+
+; FIXME: memset(malloc(x), 0, x) -> calloc(1, x)
+; This doesn't fire currently because the malloc has more than one use.
+
+define float* @pr25892(i32 %size) #0 {
+; CHECK-LABEL: @pr25892(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @malloc(i32 [[SIZE:%.*]]) #0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[CALL]] to float*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* nonnull align 1 [[CALL]], i8 0, i32 [[SIZE]], i1 false)
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float* [[RETVAL_0]]
+;
+entry:
+  %call = tail call i8* @malloc(i32 %size) #1
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cleanup, label %if.end
+if.end:
+  %bc = bitcast i8* %call to float*
+  %call2 = tail call i8* @memset(i8* nonnull %call, i32 0, i32 %size) #1
+  br label %cleanup
+cleanup:
+  %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+  ret float* %retval.0
+}
+
+; If there's a calloc transform, the store must also be eliminated.
+
+define i8* @buffer_is_modified_then_memset(i32 %size) {
+; CHECK-LABEL: @buffer_is_modified_then_memset(
+; CHECK-NEXT:    [[PTR:%.*]] = tail call i8* @malloc(i32 [[SIZE:%.*]]) #0
+; CHECK-NEXT:    store i8 1, i8* [[PTR]], align 1
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 1 [[PTR]], i8 0, i32 [[SIZE]], i1 false)
+; CHECK-NEXT:    ret i8* [[PTR]]
+;
+  %ptr = tail call i8* @malloc(i32 %size) #1
+  store i8 1, i8* %ptr           ;; fdata[0] = 1;
+  %memset = tail call i8* @memset(i8* nonnull %ptr, i32 0, i32 %size) #1
+  ret i8* %memset
+}
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readnone }
+
diff --git a/llvm/test/Transforms/InstCombine/memset-2.ll b/llvm/test/Transforms/InstCombine/memset-2.ll
new file mode 100644
index 00000000000..5e446cb0ee0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memset-2.ll
@@ -0,0 +1,17 @@
+; Test that the memset library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8 @memset(i8*, i32, i32)
+
+; Check that memset functions with the wrong prototype aren't simplified.
+
+define i8 @test_no_simplify1(i8* %mem, i32 %val, i32 %size) {
+; CHECK-LABEL: @test_no_simplify1(
+  %ret = call i8 @memset(i8* %mem, i32 %val, i32 %size)
+; CHECK: call i8 @memset
+  ret i8 %ret
+; CHECK: ret i8 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/memset.ll b/llvm/test/Transforms/InstCombine/memset.ll
new file mode 100644
index 00000000000..1efb2e48a29
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memset.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test([1024 x i8]* %target) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[TARGET:%.*]], i64 0, i64 0
+; CHECK-NEXT:    store i8 1, i8* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [1024 x i8]* [[TARGET]] to i16*
+; CHECK-NEXT:    store i16 257, i16* [[TMP2]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [1024 x i8]* [[TARGET]] to i32*
+; CHECK-NEXT:    store i32 16843009, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [1024 x i8]* [[TARGET]] to i64*
+; CHECK-NEXT:    store i64 72340172838076673, i64* [[TMP4]], align 8
+; CHECK-NEXT:    ret i32 0
+;
+  %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* align 2 %target_p, i8 1, i32 2, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* align 4 %target_p, i8 1, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* align 8 %target_p, i8 1, i32 8, i1 false)
+  ret i32 0
+}
+
+@Unknown = external constant i128
+
+define void @memset_to_constant() {
+; CHECK-LABEL: @memset_to_constant(
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (i128* @Unknown to i8*), i8 0, i32 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %p = bitcast i128* @Unknown to i8*
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) argmemonly nounwind
diff --git a/llvm/test/Transforms/InstCombine/memset2.ll b/llvm/test/Transforms/InstCombine/memset2.ll
new file mode 100644
index 00000000000..094cb6d536a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memset2.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Test to check that instcombine doesn't drop the address space when optimizing
+; memset.
+%struct.Moves = type { [9 x i8], i8, i8, i8, [5 x i8] }
+
+define i32 @test(%struct.Moves addrspace(1)* nocapture %moves) {
+entry:
+; CHECK: bitcast i8 addrspace(1)* %gep to i64 addrspace(1)*
+	%gep = getelementptr inbounds %struct.Moves, %struct.Moves addrspace(1)* %moves, i32 1, i32 0, i32 9
+	call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %gep, i8 0, i64 8, i1 false)
+	ret i32 0
+}
+
+declare void @llvm.memset.p1i8.i64(i8addrspace(1)* nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
new file mode 100644
index 00000000000..71f95b02457
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
@@ -0,0 +1,130 @@
+; Test lib call simplification of __memset_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://7719085
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] }
+@t = common global %struct.T zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define i8* @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+  ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
+  ret i8* %ret
+}
+
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
+; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
+  ret i8* %ret
+}
+
+; Check cases where dstlen < len.
+
+define i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: %ret = call i8* @__memset_chk(i8* bitcast (%struct.T* @t to i8*), i32 0, i64 1824, i64 400)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400)
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify2() {
+; CHECK-LABEL: @test_no_simplify2(
+  %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: %ret = call i8* @__memset_chk(i8* bitcast (%struct.T* @t to i8*), i32 0, i64 1824, i64 0)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0)
+  ret i8* %ret
+}
+
+; Test that RAUW in SimplifyLibCalls for __memset_chk generates valid IR
+define i32 @test_rauw(i8* %a, i8* %b, i8** %c) {
+; CHECK-LABEL: test_rauw
+entry:
+  %call49 = call i64 @strlen(i8* %a)
+  %add180 = add i64 %call49, 1
+  %yo107 = call i64 @llvm.objectsize.i64.p0i8(i8* %b, i1 false, i1 false, i1 false)
+  %call50 = call i8* @__memmove_chk(i8* %b, i8* %a, i64 %add180, i64 %yo107)
+; CHECK: %strlen = call i64 @strlen(i8* %b)
+; CHECK-NEXT: %strchr2 = getelementptr i8, i8* %b, i64 %strlen
+  %call51i = call i8* @strrchr(i8* %b, i32 0)
+  %d = load i8*, i8** %c, align 8
+  %sub182 = ptrtoint i8* %d to i64
+  %sub183 = ptrtoint i8* %b to i64
+  %sub184 = sub i64 %sub182, %sub183
+  %add52.i.i = add nsw i64 %sub184, 1
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %strchr2
+  %call185 = call i8* @__memset_chk(i8* %call51i, i32 0, i64 %add52.i.i, i64 -1)
+  ret i32 4
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64, i64)
+declare i8* @strrchr(i8*, i32)
+declare i64 @strlen(i8* nocapture)
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
+
+declare i8* @__memset_chk(i8*, i32, i64, i64)
+
+; FIXME: memset(malloc(x), 0, x) -> calloc(1, x)
+
+define float* @pr25892(i64 %size) #0 {
+entry:
+  %call = tail call i8* @malloc(i64 %size) #1
+  %cmp = icmp eq i8* %call, null
+  br i1 %cmp, label %cleanup, label %if.end
+if.end:
+  %bc = bitcast i8* %call to float*
+  %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false, i1 false, i1 false)
+  %call3 = tail call i8* @__memset_chk(i8* nonnull %call, i32 0, i64 %size, i64 %call2) #1
+  br label %cleanup
+cleanup:
+  %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+  ret float* %retval.0
+
+; CHECK-LABEL: @pr25892(
+; CHECK:       entry:
+; CHECK-NEXT:    %call = tail call i8* @malloc(i64 %size)
+; CHECK-NEXT:    %cmp = icmp eq i8* %call, null
+; CHECK-NEXT:    br i1 %cmp, label %cleanup, label %if.end
+; CHECK:       if.end:
+; CHECK-NEXT:    %bc = bitcast i8* %call to float*
+; CHECK-NEXT:    %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false, i1 false, i1 false)
+; CHECK-NEXT:    %call3 = tail call i8* @__memset_chk(i8* nonnull %call, i32 0, i64 %size, i64 %call2)
+; CHECK-NEXT:    br label %cleanup
+; CHECK:       cleanup:
+; CHECK-NEXT:    %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+; CHECK-NEXT:    ret float* %retval.0
+}
+
+declare noalias i8* @malloc(i64) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readnone }
+
diff --git a/llvm/test/Transforms/InstCombine/memset_chk-2.ll b/llvm/test/Transforms/InstCombine/memset_chk-2.ll
new file mode 100644
index 00000000000..bb4f772785e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memset_chk-2.ll
@@ -0,0 +1,20 @@
+; Test that lib call simplification doesn't simplify __memset_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] }
+@t = common global %struct.T zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK-LABEL: @test_no_simplify(
+  %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+  call i8* @__memset_chk(i8* %dst, i32 0, i64 1824)
+  ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64)
diff --git a/llvm/test/Transforms/InstCombine/merge-icmp.ll b/llvm/test/Transforms/InstCombine/merge-icmp.ll
new file mode 100644
index 00000000000..6a65b5befa3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/merge-icmp.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i1 @test1(i16* %x) {
+  %load = load i16, i16* %x, align 4
+  %trunc = trunc i16 %load to i8
+  %cmp1 = icmp eq i8 %trunc, 127
+  %and = and i16 %load, -256
+  %cmp2 = icmp eq i16 %and, 17664
+  %or = and i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: load i16
+; CHECK-NEXT: icmp eq i16 %load, 17791
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test2(i16* %x) {
+  %load = load i16, i16* %x, align 4
+  %and = and i16 %load, -256
+  %cmp1 = icmp eq i16 %and, 32512
+  %trunc = trunc i16 %load to i8
+  %cmp2 = icmp eq i8 %trunc, 69
+  %or = and i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: load i16
+; CHECK-NEXT: icmp eq i16 %load, 32581
+; CHECK-NEXT: ret i1
+}
diff --git a/llvm/test/Transforms/InstCombine/min-positive.ll b/llvm/test/Transforms/InstCombine/min-positive.ll
new file mode 100644
index 00000000000..51f98bc00dc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/min-positive.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+@g = external global i32
+
+define i1 @smin(i32 %other) {
+; CHECK-LABEL: @smin(
+; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt i32 [[OTHER:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TEST]]
+;
+  %positive = load i32, i32* @g, !range !{i32 1, i32 2048}
+  %cmp = icmp slt i32 %positive, %other
+  %sel = select i1 %cmp, i32 %positive, i32 %other
+  %test = icmp sgt i32 %sel, 0
+  ret i1 %test
+}
+
+; Range metadata doesn't work for vectors, so find another way to trigger isKnownPositive().
+
+define <2 x i1> @smin_vec(<2 x i32> %x, <2 x i32> %other) {
+; CHECK-LABEL: @smin_vec(
+; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt <2 x i32> [[OTHER:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TEST]]
+;
+  %notneg = and <2 x i32> %x, <i32 7, i32 7>
+  %positive = or <2 x i32> %notneg, <i32 1, i32 1>
+  %cmp = icmp slt <2 x i32> %positive, %other
+  %sel = select <2 x i1> %cmp, <2 x i32> %positive, <2 x i32> %other
+  %test = icmp sgt <2 x i32> %sel, zeroinitializer
+  ret <2 x i1> %test
+}
+
+define i1 @smin_commute(i32 %other) {
+; CHECK-LABEL: @smin_commute(
+; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt i32 [[OTHER:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TEST]]
+;
+  %positive = load i32, i32* @g, !range !{i32 1, i32 2048}
+  %cmp = icmp slt i32 %other, %positive
+  %sel = select i1 %cmp, i32 %other, i32 %positive
+  %test = icmp sgt i32 %sel, 0
+  ret i1 %test
+}
+
+define <2 x i1> @smin_commute_vec(<2 x i32> %x, <2 x i32> %other) {
+; CHECK-LABEL: @smin_commute_vec(
+; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt <2 x i32> [[OTHER:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TEST]]
+;
+  %notneg = and <2 x i32> %x, <i32 7, i32 7>
+  %positive = or <2 x i32> %notneg, <i32 1, i32 1>
+  %cmp = icmp slt <2 x i32> %other, %positive
+  %sel = select <2 x i1> %cmp, <2 x i32> %other, <2 x i32> %positive
+  %test = icmp sgt <2 x i32> %sel, zeroinitializer
+  ret <2 x i1> %test
+}
+
+define <2 x i1> @smin_commute_vec_undef_elts(<2 x i32> %x, <2 x i32> %other) {
+; CHECK-LABEL: @smin_commute_vec_undef_elts(
+; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt <2 x i32> [[OTHER:%.*]], <i32 0, i32 undef>
+; CHECK-NEXT:    ret <2 x i1> [[TEST]]
+;
+  %notneg = and <2 x i32> %x, <i32 7, i32 7>
+  %positive = or <2 x i32> %notneg, <i32 1, i32 1>
+  %cmp = icmp slt <2 x i32> %other, %positive
+  %sel = select <2 x i1> %cmp, <2 x i32> %other, <2 x i32> %positive
+  %test = icmp sgt <2 x i32> %sel, <i32 0, i32 undef>
+  ret <2 x i1> %test
+}
+; %positive might be zero
+
+define i1 @maybe_not_positive(i32 %other) {
+; CHECK-LABEL: @maybe_not_positive(
+; CHECK-NEXT:    [[POSITIVE:%.*]] = load i32, i32* @g, align 4, !range !0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[POSITIVE]], [[OTHER:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[POSITIVE]], i32 [[OTHER]]
+; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt i32 [[SEL]], 0
+; CHECK-NEXT:    ret i1 [[TEST]]
+;
+  %positive = load i32, i32* @g, !range !{i32 0, i32 2048}
+  %cmp = icmp slt i32 %positive, %other
+  %sel = select i1 %cmp, i32 %positive, i32 %other
+  %test = icmp sgt i32 %sel, 0
+  ret i1 %test
+}
+
+define <2 x i1> @maybe_not_positive_vec(<2 x i32> %x, <2 x i32> %other) {
+; CHECK-LABEL: @maybe_not_positive_vec(
+; CHECK-NEXT:    [[NOTNEG:%.*]] = and <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[NOTNEG]], [[OTHER:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[NOTNEG]], <2 x i32> [[OTHER]]
+; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt <2 x i32> [[SEL]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TEST]]
+;
+  %notneg = and <2 x i32> %x, <i32 7, i32 7>
+  %cmp = icmp slt <2 x i32> %notneg, %other
+  %sel = select <2 x i1> %cmp, <2 x i32> %notneg, <2 x i32> %other
+  %test = icmp sgt <2 x i32> %sel, zeroinitializer
+  ret <2 x i1> %test
+}
+
diff --git a/llvm/test/Transforms/InstCombine/minimum.ll b/llvm/test/Transforms/InstCombine/minimum.ll
new file mode 100644
index 00000000000..32aae6417eb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/minimum.ll
@@ -0,0 +1,317 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.minimum.f32(float, float)
+declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
+
+declare double @llvm.minimum.f64(double, double)
+declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
+
+declare float @llvm.maximum.f32(float, float)
+
+define float @constant_fold_minimum_f32() {
+; CHECK-LABEL: @constant_fold_minimum_f32(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %x = call float @llvm.minimum.f32(float 1.0, float 2.0)
+  ret float %x
+}
+
+define float @constant_fold_minimum_f32_inv() {
+; CHECK-LABEL: @constant_fold_minimum_f32_inv(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %x = call float @llvm.minimum.f32(float 2.0, float 1.0)
+  ret float %x
+}
+
+define float @constant_fold_minimum_f32_nan0() {
+; CHECK-LABEL: @constant_fold_minimum_f32_nan0(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %x = call float @llvm.minimum.f32(float 0x7FF8000000000000, float 2.0)
+  ret float %x
+}
+
+define float @constant_fold_minimum_f32_nan1() {
+; CHECK-LABEL: @constant_fold_minimum_f32_nan1(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %x = call float @llvm.minimum.f32(float 2.0, float 0x7FF8000000000000)
+  ret float %x
+}
+
+define float @constant_fold_minimum_f32_nan_nan() {
+; CHECK-LABEL: @constant_fold_minimum_f32_nan_nan(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %x = call float @llvm.minimum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
+  ret float %x
+}
+
+define float @constant_fold_minimum_f32_p0_p0() {
+; CHECK-LABEL: @constant_fold_minimum_f32_p0_p0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %x = call float @llvm.minimum.f32(float 0.0, float 0.0)
+  ret float %x
+}
+
+define float @constant_fold_minimum_f32_p0_n0() {
+; CHECK-LABEL: @constant_fold_minimum_f32_p0_n0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %x = call float @llvm.minimum.f32(float 0.0, float -0.0)
+  ret float %x
+}
+
+define float @constant_fold_minimum_f32_n0_p0() {
+; CHECK-LABEL: @constant_fold_minimum_f32_n0_p0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %x = call float @llvm.minimum.f32(float -0.0, float 0.0)
+  ret float %x
+}
+
+define float @constant_fold_minimum_f32_n0_n0() {
+; CHECK-LABEL: @constant_fold_minimum_f32_n0_n0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %x = call float @llvm.minimum.f32(float -0.0, float -0.0)
+  ret float %x
+}
+
+define <4 x float> @constant_fold_minimum_v4f32() {
+; CHECK-LABEL: @constant_fold_minimum_v4f32(
+; CHECK-NEXT:    ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 5.000000e+00>
+;
+  %x = call <4 x float> @llvm.minimum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+define double @constant_fold_minimum_f64() {
+; CHECK-LABEL: @constant_fold_minimum_f64(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %x = call double @llvm.minimum.f64(double 1.0, double 2.0)
+  ret double %x
+}
+
+define double @constant_fold_minimum_f64_nan0() {
+; CHECK-LABEL: @constant_fold_minimum_f64_nan0(
+; CHECK-NEXT:    ret double 0x7FF8000000000000
+;
+  %x = call double @llvm.minimum.f64(double 0x7FF8000000000000, double 2.0)
+  ret double %x
+}
+
+define double @constant_fold_minimum_f64_nan1() {
+; CHECK-LABEL: @constant_fold_minimum_f64_nan1(
+; CHECK-NEXT:    ret double 0x7FF8000000000000
+;
+  %x = call double @llvm.minimum.f64(double 2.0, double 0x7FF8000000000000)
+  ret double %x
+}
+
+define double @constant_fold_minimum_f64_nan_nan() {
+; CHECK-LABEL: @constant_fold_minimum_f64_nan_nan(
+; CHECK-NEXT:    ret double 0x7FF8000000000000
+;
+  %x = call double @llvm.minimum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000)
+  ret double %x
+}
+
+define float @canonicalize_constant_minimum_f32(float %x) {
+; CHECK-LABEL: @canonicalize_constant_minimum_f32(
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    ret float [[Y]]
+;
+  %y = call float @llvm.minimum.f32(float 1.0, float %x)
+  ret float %y
+}
+
+define float @minimum_f32_nan_val(float %x) {
+; CHECK-LABEL: @minimum_f32_nan_val(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %y = call float @llvm.minimum.f32(float 0x7FF8000000000000, float %x)
+  ret float %y
+}
+
+define float @minimum_f32_val_nan(float %x) {
+; CHECK-LABEL: @minimum_f32_val_nan(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %y = call float @llvm.minimum.f32(float %x, float 0x7FF8000000000000)
+  ret float %y
+}
+
+define float @minimum_f32_1_minimum_val_p0(float %x) {
+; CHECK-LABEL: @minimum_f32_1_minimum_val_p0(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.minimum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minimum.f32(float %x, float 0.0)
+  %z = call float @llvm.minimum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @minimum_f32_1_minimum_p0_val_fast(float %x) {
+; CHECK-LABEL: @minimum_f32_1_minimum_p0_val_fast(
+; CHECK-NEXT: [[RES:%.*]] = call fast float @llvm.minimum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minimum.f32(float 0.0, float %x)
+  %z = call fast float @llvm.minimum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @minimum_f32_1_minimum_p0_val_nnan_ninf(float %x) {
+; CHECK-LABEL: @minimum_f32_1_minimum_p0_val_nnan_ninf(
+; CHECK-NEXT: [[RES:%.*]] = call nnan ninf float @llvm.minimum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minimum.f32(float 0.0, float %x)
+  %z = call nnan ninf float @llvm.minimum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @minimum_f32_p0_minimum_val_n0(float %x) {
+; CHECK-LABEL: @minimum_f32_p0_minimum_val_n0(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.minimum.f32(float %x, float -0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minimum.f32(float %x, float -0.0)
+  %z = call float @llvm.minimum.f32(float %y, float 0.0)
+  ret float %z
+}
+
+define float @minimum_f32_1_minimum_p0_val(float %x) {
+; CHECK-LABEL: @minimum_f32_1_minimum_p0_val(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.minimum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minimum.f32(float 0.0, float %x)
+  %z = call float @llvm.minimum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define <2 x float> @minimum_f32_1_minimum_val_p0_val_v2f32(<2 x float> %x) {
+; CHECK-LABEL: @minimum_f32_1_minimum_val_p0_val_v2f32(
+; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.minimum.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
+; CHECK-NEXT: ret <2 x float> [[RES]]
+  %y = call <2 x float> @llvm.minimum.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
+  %z = call <2 x float> @llvm.minimum.v2f32(<2 x float> %y, <2 x float><float 1.0, float 1.0>)
+  ret <2 x float> %z
+}
+
+define float @minimum4(float %x, float %y, float %z, float %w) {
+; CHECK-LABEL: @minimum4(
+; CHECK-NEXT:    [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call float @llvm.minimum.f32(float [[Z:%.*]], float [[W:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = call float @llvm.minimum.f32(float [[A]], float [[B]])
+; CHECK-NEXT:    ret float [[C]]
+;
+  %a = call float @llvm.minimum.f32(float %x, float %y)
+  %b = call float @llvm.minimum.f32(float %z, float %w)
+  %c = call float @llvm.minimum.f32(float %a, float %b)
+  ret float %c
+}
+
+define float @minimum_x_maximum_x_y(float %x, float %y) {
+; CHECK-LABEL: @minimum_x_maximum_x_y(
+; CHECK-NEXT:    [[A:%.*]] = call float @llvm.maximum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call float @llvm.minimum.f32(float [[X]], float [[A]])
+; CHECK-NEXT:    ret float [[B]]
+;
+  %a = call float @llvm.maximum.f32(float %x, float %y)
+  %b = call float @llvm.minimum.f32(float %x, float %a)
+  ret float %b
+}
+
+define float @maximum_x_minimum_x_y(float %x, float %y) {
+; CHECK-LABEL: @maximum_x_minimum_x_y(
+; CHECK-NEXT:    [[A:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call float @llvm.maximum.f32(float [[X]], float [[A]])
+; CHECK-NEXT:    ret float [[B]]
+;
+  %a = call float @llvm.minimum.f32(float %x, float %y)
+  %b = call float @llvm.maximum.f32(float %x, float %a)
+  ret float %b
+}
+
+; PR37405 - https://bugs.llvm.org/show_bug.cgi?id=37405
+
+define double @neg_neg(double %x, double %y) {
+; CHECK-LABEL: @neg_neg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maximum.f64(double [[X:%.*]], double [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fsub double -0.0, %x
+  %negy = fsub double -0.0, %y
+  %r = call double @llvm.minimum.f64(double %negx, double %negy)
+  ret double %r
+}
+
+; FMF is not required, but it should be propagated from the intrinsic (not the fnegs).
+; Also, make sure this works with vectors.
+
+define <2 x double> @neg_neg_vec_fmf(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @neg_neg_vec_fmf(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub nnan ninf <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %negx = fsub reassoc <2 x double> <double -0.0, double -0.0>, %x
+  %negy = fsub fast <2 x double> <double -0.0, double -0.0>, %y
+  %r = call nnan ninf <2 x double> @llvm.minimum.v2f64(<2 x double> %negx, <2 x double> %negy)
+  ret <2 x double> %r
+}
+
+; 1 extra use of an intermediate value should still allow the fold,
+; but 2 would require more instructions than we started with.
+
+declare void @use(double)
+define double @neg_neg_extra_use_x(double %x, double %y) {
+; CHECK-LABEL: @neg_neg_extra_use_x(
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub double -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maximum.f64(double [[X]], double [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(double [[NEGX]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fsub double -0.0, %x
+  %negy = fsub double -0.0, %y
+  %r = call double @llvm.minimum.f64(double %negx, double %negy)
+  call void @use(double %negx)
+  ret double %r
+}
+
+define double @neg_neg_extra_use_y(double %x, double %y) {
+; CHECK-LABEL: @neg_neg_extra_use_y(
+; CHECK-NEXT:    [[NEGY:%.*]] = fsub double -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maximum.f64(double [[X:%.*]], double [[Y]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(double [[NEGY]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fsub double -0.0, %x
+  %negy = fsub double -0.0, %y
+  %r = call double @llvm.minimum.f64(double %negx, double %negy)
+  call void @use(double %negy)
+  ret double %r
+}
+
+define double @neg_neg_extra_use_x_and_y(double %x, double %y) {
+; CHECK-LABEL: @neg_neg_extra_use_x_and_y(
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub double -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[NEGY:%.*]] = fsub double -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = call double @llvm.minimum.f64(double [[NEGX]], double [[NEGY]])
+; CHECK-NEXT:    call void @use(double [[NEGX]])
+; CHECK-NEXT:    call void @use(double [[NEGY]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fsub double -0.0, %x
+  %negy = fsub double -0.0, %y
+  %r = call double @llvm.minimum.f64(double %negx, double %negy)
+  call void @use(double %negx)
+  call void @use(double %negy)
+  ret double %r
+}
diff --git a/llvm/test/Transforms/InstCombine/minmax-demandbits.ll b/llvm/test/Transforms/InstCombine/minmax-demandbits.ll
new file mode 100644
index 00000000000..29a569663d2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/minmax-demandbits.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+
+define i32 @and_umax_less(i32 %A) {
+; CHECK-LABEL: @and_umax_less(
+; CHECK-NEXT:    [[X:%.*]] = and i32 [[A:%.*]], -32
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %l0 = icmp ugt i32 31, %A
+  %l1 = select i1 %l0, i32 31, i32 %A
+  %x = and i32 %l1, -32
+  ret i32 %x
+}
+
+define i32 @and_umax_muchless(i32 %A) {
+; CHECK-LABEL: @and_umax_muchless(
+; CHECK-NEXT:    [[X:%.*]] = and i32 [[A:%.*]], -32
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %l0 = icmp ugt i32 12, %A
+  %l1 = select i1 %l0, i32 12, i32 %A
+  %x = and i32 %l1, -32
+  ret i32 %x
+}
+
+define i32 @and_umax_more(i32 %A) {
+; CHECK-LABEL: @and_umax_more(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 32
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 32
+; CHECK-NEXT:    [[X:%.*]] = and i32 [[L1]], -32
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %l0 = icmp ugt i32 32, %A
+  %l1 = select i1 %l0, i32 32, i32 %A
+  %x = and i32 %l1, -32
+  ret i32 %x
+}
+
+define i32 @shr_umax(i32 %A) {
+; CHECK-LABEL: @shr_umax(
+; CHECK-NEXT:    [[X:%.*]] = lshr i32 [[A:%.*]], 4
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %l0 = icmp ugt i32 15, %A
+  %l1 = select i1 %l0, i32 15, i32 %A
+  %x = lshr i32 %l1, 4
+  ret i32 %x
+}
+
+; Various constants for C2 & umax(A, C1)
+
+define i8 @t_0_1(i8 %A) {
+; CHECK-LABEL: @t_0_1(
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[A:%.*]], 1
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 0
+  %l1 = select i1 %l2, i8 %A, i8 0
+  %x = and i8 %l1, 1
+  ret i8 %x
+}
+
+define i8 @t_0_10(i8 %A) {
+; CHECK-LABEL: @t_0_10(
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[A:%.*]], 10
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 0
+  %l1 = select i1 %l2, i8 %A, i8 0
+  %x = and i8 %l1, 10
+  ret i8 %x
+}
+
+define i8 @t_1_10(i8 %A) {
+; CHECK-LABEL: @t_1_10(
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[A:%.*]], 10
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 1
+  %l1 = select i1 %l2, i8 %A, i8 1
+  %x = and i8 %l1, 10
+  ret i8 %x
+}
+
+define i8 @t_2_4(i8 %A) {
+; CHECK-LABEL: @t_2_4(
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[A:%.*]], 4
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 2
+  %l1 = select i1 %l2, i8 %A, i8 2
+  %x = and i8 %l1, 4
+  ret i8 %x
+}
+
+define i8 @t_2_192(i8 %A) {
+; CHECK-LABEL: @t_2_192(
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[A:%.*]], -64
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 2
+  %l1 = select i1 %l2, i8 %A, i8 2
+  %x = and i8 %l1, -64
+  ret i8 %x
+}
+
+define i8 @t_2_63_or(i8 %A) {
+; CHECK-LABEL: @t_2_63_or(
+; CHECK-NEXT:    [[X:%.*]] = or i8 [[A:%.*]], 63
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 2
+  %l1 = select i1 %l2, i8 %A, i8 2
+  %x = or i8 %l1, 63
+  ret i8 %x
+}
+
+define i8 @f_1_1(i8 %A) {
+; CHECK-LABEL: @f_1_1(
+; CHECK-NEXT:    [[L2:%.*]] = icmp ugt i8 [[A:%.*]], 1
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L2]], i8 [[A]], i8 1
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[L1]], 1
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 1
+  %l1 = select i1 %l2, i8 %A, i8 1
+  %x = and i8 %l1, 1
+  ret i8 %x
+}
+
+define i8 @f_32_32(i8 %A) {
+; CHECK-LABEL: @f_32_32(
+; CHECK-NEXT:    [[L2:%.*]] = icmp ugt i8 [[A:%.*]], 32
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L2]], i8 [[A]], i8 32
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[L1]], -32
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 32
+  %l1 = select i1 %l2, i8 %A, i8 32
+  %x = and i8 %l1, -32
+  ret i8 %x
+}
+
+define i8 @f_191_192(i8 %A) {
+; CHECK-LABEL: @f_191_192(
+; CHECK-NEXT:    [[L2:%.*]] = icmp ugt i8 [[A:%.*]], -65
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L2]], i8 [[A]], i8 -65
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[L1]], -64
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 191
+  %l1 = select i1 %l2, i8 %A, i8 191
+  %x = and i8 %l1, 192
+  ret i8 %x
+}
+
+define i8 @f_10_1(i8 %A) {
+; CHECK-LABEL: @f_10_1(
+; CHECK-NEXT:    [[L2:%.*]] = icmp ugt i8 [[A:%.*]], 10
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L2]], i8 [[A]], i8 10
+; CHECK-NEXT:    [[X:%.*]] = and i8 [[L1]], 1
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %l2 = icmp ugt i8 %A, 10
+  %l1 = select i1 %l2, i8 %A, i8 10
+  %x = and i8 %l1, 1
+  ret i8 %x
+}
+
+define i32 @and_umin(i32 %A) {
+; CHECK-LABEL: @and_umin(
+; CHECK-NEXT:    ret i32 0
+;
+  %l0 = icmp ult i32 15, %A
+  %l1 = select i1 %l0, i32 15, i32 %A
+  %x = and i32 %l1, -32
+  ret i32 %x
+}
+
+define i32 @or_umin(i32 %A) {
+; CHECK-LABEL: @or_umin(
+; CHECK-NEXT:    ret i32 31
+;
+  %l0 = icmp ult i32 15, %A
+  %l1 = select i1 %l0, i32 15, i32 %A
+  %x = or i32 %l1, 31
+  ret i32 %x
+}
+
+define i8 @or_min_31_30(i8 %A) {
+; CHECK-LABEL: @or_min_31_30(
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A:%.*]], 31
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp ult i8 %A, -30
+  %min = select i1 %cmp, i8 %A, i8 -30
+  %r = or i8 %min, 31
+  ret i8 %r
+}
+
+define i8 @and_min_7_7(i8 %A) {
+; CHECK-LABEL: @and_min_7_7(
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A:%.*]], -8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %l2 = icmp ult i8 %A, -7
+  %min = select i1 %l2, i8 %A, i8 -7
+  %r = and i8 %min, -8
+  ret i8 %r
+}
+
+define i8 @and_min_7_8(i8 %A) {
+; CHECK-LABEL: @and_min_7_8(
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A:%.*]], -8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %l2 = icmp ult i8 %A, -8
+  %min = select i1 %l2, i8 %A, i8 -8
+  %r = and i8 %min, -8
+  ret i8 %r
+}
+
+define i8 @and_min_7_9(i8 %A) {
+; CHECK-LABEL: @and_min_7_9(
+; CHECK-NEXT:    [[L2:%.*]] = icmp ult i8 [[A:%.*]], -9
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[L2]], i8 [[A]], i8 -9
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[MIN]], -8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %l2 = icmp ult i8 %A, -9
+  %min = select i1 %l2, i8 %A, i8 -9
+  %r = and i8 %min, -8
+  ret i8 %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
new file mode 100644
index 00000000000..264e579db1d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -0,0 +1,1450 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; This is the canonical form for a type-changing min/max.
+define i64 @t1(i32 %a) {
+; CHECK-LABEL: @t1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 5
+; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %1 = icmp slt i32 %a, 5
+  %2 = select i1 %1, i32 %a, i32 5
+  %3 = sext i32 %2 to i64
+  ret i64 %3
+}
+
+; Check this is converted into canonical form, as above.
+define i64 @t2(i32 %a) {
+; CHECK-LABEL: @t2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 5
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 5
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %1 = icmp slt i32 %a, 5
+  %2 = sext i32 %a to i64
+  %3 = select i1 %1, i64 %2, i64 5
+  ret i64 %3
+}
+
+; Same as @t2, with flipped operands and zext instead of sext.
+define i64 @t3(i32 %a) {
+; CHECK-LABEL: @t3(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 5
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 5
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %1 = icmp ult i32 %a, 5
+  %2 = zext i32 %a to i64
+  %3 = select i1 %1, i64 5, i64 %2
+  ret i64 %3
+}
+
+; Same again, with trunc.
+define i32 @t4(i64 %a) {
+; CHECK-LABEL: @t4(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[A:%.*]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 5
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = icmp slt i64 %a, 5
+  %2 = trunc i64 %a to i32
+  %3 = select i1 %1, i32 %2, i32 5
+  ret i32 %3
+}
+
+; Same as @t3, but with mismatched signedness between icmp and zext.
+define i64 @t5(i32 %a) {
+; CHECK-LABEL: @t5(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 5
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %1 = icmp slt i32 %a, 5
+  %2 = zext i32 %a to i64
+  %3 = select i1 %1, i64 5, i64 %2
+  ret i64 %3
+}
+
+define float @t6(i32 %a) {
+; CHECK-LABEL: @t6(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %1 = icmp slt i32 %a, 0
+  %2 = select i1 %1, i32 %a, i32 0
+  %3 = sitofp i32 %2 to float
+  ret float %3
+}
+
+define i16 @t7(i32 %a) {
+; CHECK-LABEL: @t7(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], -32768
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32768
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[TMP3]]
+;
+  %1 = icmp slt i32 %a, -32768
+  %2 = trunc i32 %a to i16
+  %3 = select i1 %1, i16 %2, i16 -32768
+  ret i16 %3
+}
+
+; Just check for no infinite loop. InstSimplify liked to
+; "simplify" -32767 by removing all the sign bits,
+; which led to a canonicalization fight between different
+; parts of instcombine.
+define i32 @t8(i64 %a, i32 %b) {
+; CHECK-LABEL: @t8(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[A:%.*]], -32767
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 -32767
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i32 [[B:%.*]], 42
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 42, i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], [[B]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i1 [[TMP6]] to i32
+; CHECK-NEXT:    ret i32 [[TMP7]]
+;
+  %1 = icmp slt i64 %a, -32767
+  %2 = select i1 %1, i64 %a, i64 -32767
+  %3 = trunc i64 %2 to i32
+  %4 = icmp slt i32 %b, 42
+  %5 = select i1 %4, i32 42, i32 %3
+  %6 = icmp ne i32 %5, %b
+  %7 = zext i1 %6 to i32
+  ret i32 %7
+}
+
+; Ensure this doesn't get converted to a min/max.
+define i64 @t9(i32 %a) {
+; CHECK-LABEL: @t9(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], i64 [[TMP2]], i64 4294967295
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %1 = icmp sgt i32 %a, -1
+  %2 = sext i32 %a to i64
+  %3 = select i1 %1, i64 %2, i64 4294967295
+  ret i64 %3
+}
+
+define float @t10(i32 %x) {
+; CHECK-LABEL: @t10(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[TMP2:%.*]] = sitofp i32 [[R1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %f_x = sitofp i32 %x to float
+  %cmp = icmp sgt i32 %x, 255
+  %r = select i1 %cmp, float %f_x, float 255.0
+  ret float %r
+}
+
+define float @t11(i64 %x) {
+; CHECK-LABEL: @t11(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], 255
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 255
+; CHECK-NEXT:    [[TMP2:%.*]] = sitofp i64 [[R1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %f_x = sitofp i64 %x to float
+  %cmp = icmp sgt i64 %x, 255
+  %r = select i1 %cmp, float %f_x, float 255.0
+  ret float %r
+}
+
+; Reuse the first 2 bitcasts as the select operands.
+
+define <4 x i32> @bitcasts_fcmp_1(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @bitcasts_fcmp_1(
+; CHECK-NEXT:    [[T0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T2:%.*]] = fcmp olt <4 x float> [[T1]], [[T0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[T2]], <4 x float> [[T0]], <4 x float> [[T1]]
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[T5]]
+;
+  %t0 = bitcast <2 x i64> %a to <4 x float>
+  %t1 = bitcast <2 x i64> %b to <4 x float>
+  %t2 = fcmp olt <4 x float> %t1, %t0
+  %t3 = bitcast <2 x i64> %a to <4 x i32>
+  %t4 = bitcast <2 x i64> %b to <4 x i32>
+  %t5 = select <4 x i1> %t2, <4 x i32> %t3, <4 x i32> %t4
+  ret <4 x i32> %t5
+}
+
+; Switch cmp operand order.
+
+define <4 x i32> @bitcasts_fcmp_2(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @bitcasts_fcmp_2(
+; CHECK-NEXT:    [[T0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T2:%.*]] = fcmp olt <4 x float> [[T0]], [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[T2]], <4 x float> [[T0]], <4 x float> [[T1]]
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[T5]]
+;
+  %t0 = bitcast <2 x i64> %a to <4 x float>
+  %t1 = bitcast <2 x i64> %b to <4 x float>
+  %t2 = fcmp olt <4 x float> %t0, %t1
+  %t3 = bitcast <2 x i64> %a to <4 x i32>
+  %t4 = bitcast <2 x i64> %b to <4 x i32>
+  %t5 = select <4 x i1> %t2, <4 x i32> %t3, <4 x i32> %t4
+  ret <4 x i32> %t5
+}
+
+; Integer cmp should have the same transforms.
+
+define <4 x float> @bitcasts_icmp(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @bitcasts_icmp(
+; CHECK-NEXT:    [[T0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = icmp slt <4 x i32> [[T1]], [[T0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[T2]], <4 x i32> [[T0]], <4 x i32> [[T1]]
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
+; CHECK-NEXT:    ret <4 x float> [[T5]]
+;
+  %t0 = bitcast <2 x i64> %a to <4 x i32>
+  %t1 = bitcast <2 x i64> %b to <4 x i32>
+  %t2 = icmp slt <4 x i32> %t1, %t0
+  %t3 = bitcast <2 x i64> %a to <4 x float>
+  %t4 = bitcast <2 x i64> %b to <4 x float>
+  %t5 = select <4 x i1> %t2, <4 x float> %t3, <4 x float> %t4
+  ret <4 x float> %t5
+}
+
+; SMIN(SMIN(X, 11), 92) -> SMIN(X, 11)
+define i32 @test68(i32 %x) {
+; CHECK-LABEL: @test68(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 11
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 11
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 11, %x
+  %cond = select i1 %cmp, i32 11, i32 %x
+  %cmp3 = icmp slt i32 92, %cond
+  %retval = select i1 %cmp3, i32 92, i32 %cond
+  ret i32 %retval
+}
+
+define <2 x i32> @test68vec(<2 x i32> %x) {
+; CHECK-LABEL: @test68vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 11, i32 11>
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> <i32 11, i32 11>
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %cmp = icmp slt <2 x i32> <i32 11, i32 11>, %x
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 11, i32 11>, <2 x i32> %x
+  %cmp3 = icmp slt <2 x i32> <i32 92, i32 92>, %cond
+  %retval = select <2 x i1> %cmp3, <2 x i32> <i32 92, i32 92>, <2 x i32> %cond
+  ret <2 x i32> %retval
+}
+
+; MIN(MIN(X, 24), 83) -> MIN(X, 24)
+define i32 @test69(i32 %x) {
+; CHECK-LABEL: @test69(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 24
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 24
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ult i32 24, %x
+  %cond = select i1 %cmp, i32 24, i32 %x
+  %cmp3 = icmp ult i32 83, %cond
+  %retval = select i1 %cmp3, i32 83, i32 %cond
+  ret i32 %retval
+}
+
+; SMAX(SMAX(X, 75), 36) -> SMAX(X, 75)
+define i32 @test70(i32 %x) {
+; CHECK-LABEL: @test70(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 75
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 75
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 75
+  %cond = select i1 %cmp, i32 75, i32 %x
+  %cmp3 = icmp slt i32 %cond, 36
+  %retval = select i1 %cmp3, i32 36, i32 %cond
+  ret i32 %retval
+}
+
+; MAX(MAX(X, 68), 47) -> MAX(X, 68)
+define i32 @test71(i32 %x) {
+; CHECK-LABEL: @test71(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 68
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 68
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ult i32 %x, 68
+  %cond = select i1 %cmp, i32 68, i32 %x
+  %cmp3 = icmp ult i32 %cond, 47
+  %retval = select i1 %cmp3, i32 47, i32 %cond
+  ret i32 %retval
+}
+
+; SMIN(SMIN(X, 92), 11) -> SMIN(X, 11)
+define i32 @test72(i32 %x) {
+; CHECK-LABEL: @test72(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 11
+; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 11
+; CHECK-NEXT:    ret i32 [[RETVAL]]
+;
+  %cmp = icmp sgt i32 %x, 92
+  %cond = select i1 %cmp, i32 92, i32 %x
+  %cmp3 = icmp sgt i32 %cond, 11
+  %retval = select i1 %cmp3, i32 11, i32 %cond
+  ret i32 %retval
+}
+
+define <2 x i32> @test72vec(<2 x i32> %x) {
+; CHECK-LABEL: @test72vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 11, i32 11>
+; CHECK-NEXT:    [[RETVAL:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> <i32 11, i32 11>
+; CHECK-NEXT:    ret <2 x i32> [[RETVAL]]
+;
+  %cmp = icmp sgt <2 x i32> %x, <i32 92, i32 92>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 92, i32 92>, <2 x i32> %x
+  %cmp3 = icmp sgt <2 x i32> %cond, <i32 11, i32 11>
+  %retval = select <2 x i1> %cmp3, <2 x i32> <i32 11, i32 11>, <2 x i32> %cond
+  ret <2 x i32> %retval
+}
+
+; MIN(MIN(X, 83), 24) -> MIN(X, 24)
+define i32 @test73(i32 %x) {
+; CHECK-LABEL: @test73(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 24
+; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 24
+; CHECK-NEXT:    ret i32 [[RETVAL]]
+;
+  %cmp = icmp ugt i32 %x, 83
+  %cond = select i1 %cmp, i32 83, i32 %x
+  %cmp3 = icmp ugt i32 %cond, 24
+  %retval = select i1 %cmp3, i32 24, i32 %cond
+  ret i32 %retval
+}
+
+; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
+define i32 @test74(i32 %x) {
+; CHECK-LABEL: @test74(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 75
+; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 75
+; CHECK-NEXT:    ret i32 [[RETVAL]]
+;
+  %cmp = icmp slt i32 %x, 36
+  %cond = select i1 %cmp, i32 36, i32 %x
+  %cmp3 = icmp slt i32 %cond, 75
+  %retval = select i1 %cmp3, i32 75, i32 %cond
+  ret i32 %retval
+}
+
+; MAX(MAX(X, 47), 68) -> MAX(X, 68)
+define i32 @test75(i32 %x) {
+; CHECK-LABEL: @test75(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 68
+; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 68
+; CHECK-NEXT:    ret i32 [[RETVAL]]
+;
+  %cmp = icmp ult i32 %x, 47
+  %cond = select i1 %cmp, i32 47, i32 %x
+  %cmp3 = icmp ult i32 %cond, 68
+  %retval = select i1 %cmp3, i32 68, i32 %cond
+  ret i32 %retval
+}
+
+; The next 10 tests are value clamping with constants:
+; https://llvm.org/bugs/show_bug.cgi?id=31693
+
+; (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
+
+define i32 @clamp_signed1(i32 %x) {
+; CHECK-LABEL: @clamp_signed1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[MIN]], 15
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp slt i32 %x, 255
+  %min = select i1 %cmp2, i32 %x, i32 255
+  %cmp1 = icmp slt i32 %x, 15
+  %r = select i1 %cmp1, i32 15, i32 %min
+  ret i32 %r
+}
+
+; (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
+
+define i32 @clamp_signed2(i32 %x) {
+; CHECK-LABEL: @clamp_signed2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[MAX]], 255
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp sgt i32 %x, 15
+  %max = select i1 %cmp2, i32 %x, i32 15
+  %cmp1 = icmp sgt i32 %x, 255
+  %r = select i1 %cmp1, i32 255, i32 %max
+  ret i32 %r
+}
+
+; (X >s C1) ? SMIN(X, C2) : C1 ==> SMAX(SMIN(X, C2), C1)
+
+define i32 @clamp_signed3(i32 %x) {
+; CHECK-LABEL: @clamp_signed3(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[MIN]], 15
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp slt i32 %x, 255
+  %min = select i1 %cmp2, i32 %x, i32 255
+  %cmp1 = icmp sgt i32 %x, 15
+  %r = select i1 %cmp1, i32 %min, i32 15
+  ret i32 %r
+}
+
+; (X <s C1) ? SMAX(X, C2) : C1 ==> SMIN(SMAX(X, C1), C2)
+
+define i32 @clamp_signed4(i32 %x) {
+; CHECK-LABEL: @clamp_signed4(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[MAX]], 255
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp sgt i32 %x, 15
+  %max = select i1 %cmp2, i32 %x, i32 15
+  %cmp1 = icmp slt i32 %x, 255
+  %r = select i1 %cmp1, i32 %max, i32 255
+  ret i32 %r
+}
+
+; (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
+
+define i32 @clamp_unsigned1(i32 %x) {
+; CHECK-LABEL: @clamp_unsigned1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MIN]], 15
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp ult i32 %x, 255
+  %min = select i1 %cmp2, i32 %x, i32 255
+  %cmp1 = icmp ult i32 %x, 15
+  %r = select i1 %cmp1, i32 15, i32 %min
+  ret i32 %r
+}
+
+; (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
+
+define i32 @clamp_unsigned2(i32 %x) {
+; CHECK-LABEL: @clamp_unsigned2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[MAX]], 255
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp ugt i32 %x, 15
+  %max = select i1 %cmp2, i32 %x, i32 15
+  %cmp1 = icmp ugt i32 %x, 255
+  %r = select i1 %cmp1, i32 255, i32 %max
+  ret i32 %r
+}
+
+; (X >u C1) ? UMIN(X, C2) : C1 ==> UMAX(UMIN(X, C2), C1)
+
+define i32 @clamp_unsigned3(i32 %x) {
+; CHECK-LABEL: @clamp_unsigned3(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MIN]], 15
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MIN]], i32 15
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp ult i32 %x, 255
+  %min = select i1 %cmp2, i32 %x, i32 255
+  %cmp1 = icmp ugt i32 %x, 15
+  %r = select i1 %cmp1, i32 %min, i32 15
+  ret i32 %r
+}
+
+; (X <u C1) ? UMAX(X, C2) : C1 ==> UMIN(UMAX(X, C2), C1)
+
+define i32 @clamp_unsigned4(i32 %x) {
+; CHECK-LABEL: @clamp_unsigned4(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], i32 [[X]], i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[MAX]], 255
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 [[MAX]], i32 255
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp2 = icmp ugt i32 %x, 15
+  %max = select i1 %cmp2, i32 %x, i32 15
+  %cmp1 = icmp ult i32 %x, 255
+  %r = select i1 %cmp1, i32 %max, i32 255
+  ret i32 %r
+}
+
+; Check that clamp is recognized and there is no infinite
+; loop because of reverse cmp transformation:
+; (icmp sgt smin(PositiveA, B) 0) -> (icmp sgt B 0)
+define i32 @clamp_check_for_no_infinite_loop1(i32 %i) {
+; CHECK-LABEL: @clamp_check_for_no_infinite_loop1(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[I:%.*]], 255
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[I]], i32 255
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[SEL1]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP1]], i32 [[SEL1]], i32 0
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %cmp1 = icmp slt i32 %i, 255
+  %sel1 = select i1 %cmp1, i32 %i, i32 255
+  %cmp2 = icmp slt i32 %i, 0
+  %res = select i1 %cmp2, i32 0, i32 %sel1
+  ret i32 %res
+}
+; Check that there is no infinite loop in case of:
+; (icmp slt smax(NegativeA, B) 0) -> (icmp slt B 0)
+define i32 @clamp_check_for_no_infinite_loop2(i32 %i) {
+; CHECK-LABEL: @clamp_check_for_no_infinite_loop2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[I:%.*]], -255
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[I]], i32 -255
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[SEL1]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP1]], i32 [[SEL1]], i32 0
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %cmp1 = icmp sgt i32 %i, -255
+  %sel1 = select i1 %cmp1, i32 %i, i32 -255
+  %cmp2 = icmp slt i32 %i, 0
+  %res = select i1 %cmp2, i32 %sel1, i32 0
+  ret i32 %res
+}
+
+; Check that there is no infinite loop because of reverse cmp transformation:
+; (icmp slt smax(PositiveA, B) 2) -> (icmp eq B 1)
+define i32 @clamp_check_for_no_infinite_loop3(i32 %i) {
+; CHECK-LABEL: @clamp_check_for_no_infinite_loop3(
+; CHECK-NEXT:    [[I2:%.*]] = icmp sgt i32 [[I:%.*]], 1
+; CHECK-NEXT:    [[I3:%.*]] = select i1 [[I2]], i32 [[I]], i32 1
+; CHECK-NEXT:    br i1 true, label [[TRUELABEL:%.*]], label [[FALSELABEL:%.*]]
+; CHECK:       truelabel:
+; CHECK-NEXT:    [[I5:%.*]] = icmp slt i32 [[I3]], 2
+; CHECK-NEXT:    [[I6:%.*]] = select i1 [[I5]], i32 [[I3]], i32 2
+; CHECK-NEXT:    [[I7:%.*]] = shl nuw nsw i32 [[I6]], 2
+; CHECK-NEXT:    ret i32 [[I7]]
+; CHECK:       falselabel:
+; CHECK-NEXT:    ret i32 0
+;
+
+  %i2 = icmp sgt i32 %i, 1
+  %i3 = select i1 %i2, i32 %i, i32 1
+  %i4 = icmp sgt i32 %i3, 0
+  br i1 %i4, label %truelabel, label %falselabel
+
+truelabel: ; %i<=1, %i3>0
+  %i5 = icmp slt i32 %i3, 2
+  %i6 = select i1 %i5, i32 %i3, i32 2
+  %i7 = shl nuw nsw i32 %i6, 2
+  ret i32 %i7
+
+falselabel:
+  ret i32 0
+}
+
+; The next 3 min tests should canonicalize to the same form...and not infinite loop.
+
+define double @PR31751_umin1(i32 %x) {
+; CHECK-LABEL: @PR31751_umin1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2147483647
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[SEL]] to double
+; CHECK-NEXT:    ret double [[CONV]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sel = select i1 %cmp, i32 2147483647, i32 %x
+  %conv = sitofp i32 %sel to double
+  ret double %conv
+}
+
+define double @PR31751_umin2(i32 %x) {
+; CHECK-LABEL: @PR31751_umin2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[X]], i32 2147483647
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[SEL]] to double
+; CHECK-NEXT:    ret double [[CONV]]
+;
+  %cmp = icmp ult i32 %x, 2147483647
+  %sel = select i1 %cmp, i32 %x, i32 2147483647
+  %conv = sitofp i32 %sel to double
+  ret double %conv
+}
+
+define double @PR31751_umin3(i32 %x) {
+; CHECK-LABEL: @PR31751_umin3(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2147483647
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[SEL]] to double
+; CHECK-NEXT:    ret double [[CONV]]
+;
+  %cmp = icmp ugt i32 %x, 2147483647
+  %sel = select i1 %cmp, i32 2147483647, i32 %x
+  %conv = sitofp i32 %sel to double
+  ret double %conv
+}
+
+; The next 3 max tests should canonicalize to the same form...and not infinite loop.
+
+define double @PR31751_umax1(i32 %x) {
+; CHECK-LABEL: @PR31751_umax1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -2147483648
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[SEL]] to double
+; CHECK-NEXT:    ret double [[CONV]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sel = select i1 %cmp, i32 2147483648, i32 %x
+  %conv = sitofp i32 %sel to double
+  ret double %conv
+}
+
+define double @PR31751_umax2(i32 %x) {
+; CHECK-LABEL: @PR31751_umax2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -2147483648
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[SEL]] to double
+; CHECK-NEXT:    ret double [[CONV]]
+;
+  %cmp = icmp ugt i32 %x, 2147483648
+  %sel = select i1 %cmp, i32 %x, i32 2147483648
+  %conv = sitofp i32 %sel to double
+  ret double %conv
+}
+
+define double @PR31751_umax3(i32 %x) {
+; CHECK-LABEL: @PR31751_umax3(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -2147483648
+; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[SEL]] to double
+; CHECK-NEXT:    ret double [[CONV]]
+;
+  %cmp = icmp ult i32 %x, 2147483648
+  %sel = select i1 %cmp, i32 2147483648, i32 %x
+  %conv = sitofp i32 %sel to double
+  ret double %conv
+}
+
+; The icmp/select form a canonical smax, so don't hide that by folding the final bitcast into the select.
+
+define float @bitcast_scalar_smax(float %x, float %y) {
+; CHECK-LABEL: @bitcast_scalar_smax(
+; CHECK-NEXT:    [[BCX:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT:    [[BCY:%.*]] = bitcast float [[Y:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[BCX]], [[BCY]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[BCX]], i32 [[BCY]]
+; CHECK-NEXT:    [[BCS:%.*]] = bitcast i32 [[SEL]] to float
+; CHECK-NEXT:    ret float [[BCS]]
+;
+  %bcx = bitcast float %x to i32
+  %bcy = bitcast float %y to i32
+  %cmp = icmp sgt i32 %bcx, %bcy
+  %sel = select i1 %cmp, i32 %bcx, i32 %bcy
+  %bcs = bitcast i32 %sel to float
+  ret float %bcs
+}
+
+; FIXME: Create a canonical umax by bitcasting the select.
+
+define float @bitcast_scalar_umax(float %x, float %y) {
+; CHECK-LABEL: @bitcast_scalar_umax(
+; CHECK-NEXT:    [[BCX:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT:    [[BCY:%.*]] = bitcast float [[Y:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[BCX]], [[BCY]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[Y]]
+; CHECK-NEXT:    ret float [[SEL]]
+;
+  %bcx = bitcast float %x to i32
+  %bcy = bitcast float %y to i32
+  %cmp = icmp ugt i32 %bcx, %bcy
+  %sel = select i1 %cmp, float %x, float %y
+  ret float %sel
+}
+
+; PR32306 - https://bugs.llvm.org/show_bug.cgi?id=32306
+; The icmp/select form a canonical smin, so don't hide that by folding the final bitcast into the select.
+
+define <8 x float> @bitcast_vector_smin(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @bitcast_vector_smin(
+; CHECK-NEXT:    [[BCX:%.*]] = bitcast <8 x float> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[BCY:%.*]] = bitcast <8 x float> [[Y:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i32> [[BCX]], [[BCY]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[BCX]], <8 x i32> [[BCY]]
+; CHECK-NEXT:    [[BCS:%.*]] = bitcast <8 x i32> [[SEL]] to <8 x float>
+; CHECK-NEXT:    ret <8 x float> [[BCS]]
+;
+  %bcx = bitcast <8 x float> %x to <8 x i32>
+  %bcy = bitcast <8 x float> %y to <8 x i32>
+  %cmp = icmp slt <8 x i32> %bcx, %bcy
+  %sel = select <8 x i1> %cmp, <8 x i32> %bcx, <8 x i32> %bcy
+  %bcs = bitcast <8 x i32> %sel to <8 x float>
+  ret <8 x float> %bcs
+}
+
+; FIXME: Create a canonical umin by bitcasting the select.
+
+define <8 x float> @bitcast_vector_umin(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @bitcast_vector_umin(
+; CHECK-NEXT:    [[BCX:%.*]] = bitcast <8 x float> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[BCY:%.*]] = bitcast <8 x float> [[Y:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i32> [[BCX]], [[BCY]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[X]], <8 x float> [[Y]]
+; CHECK-NEXT:    ret <8 x float> [[SEL]]
+;
+  %bcx = bitcast <8 x float> %x to <8 x i32>
+  %bcy = bitcast <8 x float> %y to <8 x i32>
+  %cmp = icmp slt <8 x i32> %bcx, %bcy
+  %sel = select <8 x i1> %cmp, <8 x float> %x, <8 x float> %y
+  ret <8 x float> %sel
+}
+
+; Check that we look through cast and recognize min idiom.
+
+define zeroext i8 @look_through_cast1(i32 %x) {
+; CHECK-LABEL: @look_through_cast1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 511
+; CHECK-NEXT:    [[RES1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 511
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[RES1]] to i8
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %cmp1 = icmp slt i32 %x, 511
+  %x_trunc = trunc i32 %x to i8
+  %res = select i1 %cmp1, i8 %x_trunc, i8 255
+  ret i8 %res
+}
+
+; Check that we look through cast but min is not recognized.
+
+define zeroext i8 @look_through_cast2(i32 %x) {
+; CHECK-LABEL: @look_through_cast2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X:%.*]], 510
+; CHECK-NEXT:    [[X_TRUNC:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP1]], i8 [[X_TRUNC]], i8 -1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %cmp1 = icmp slt i32 %x, 510
+  %x_trunc = trunc i32 %x to i8
+  %res = select i1 %cmp1, i8 %x_trunc, i8 255
+  ret i8 %res
+}
+
+define <2 x i8> @min_through_cast_vec1(<2 x i32> %x) {
+; CHECK-LABEL: @min_through_cast_vec1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 510, i32 511>
+; CHECK-NEXT:    [[RES1:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> <i32 510, i32 511>
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc <2 x i32> [[RES1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
+;
+  %cmp = icmp slt <2 x i32> %x, <i32 510, i32 511>
+  %x_trunc = trunc <2 x i32> %x to <2 x i8>
+  %res = select <2 x i1> %cmp, <2 x i8> %x_trunc, <2 x i8> <i8 254, i8 255>
+  ret <2 x i8> %res
+}
+
+define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) {
+; CHECK-LABEL: @min_through_cast_vec2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 511, i32 511>
+; CHECK-NEXT:    [[RES1:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> <i32 511, i32 511>
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc <2 x i32> [[RES1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
+;
+  %cmp = icmp slt <2 x i32> %x, <i32 511, i32 511>
+  %x_trunc = trunc <2 x i32> %x to <2 x i8>
+  %res = select <2 x i1> %cmp, <2 x i8> %x_trunc, <2 x i8> <i8 255, i8 255>
+  ret <2 x i8> %res
+}
+
+; Remove a min/max op in a sequence with a common operand.
+; PR35717: https://bugs.llvm.org/show_bug.cgi?id=35717
+
+; min(min(a, b), min(b, c)) --> min(min(a, b), c)
+
+define i32 @common_factor_smin(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_smin(
+; CHECK-NEXT:    [[CMP_AB:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[MIN_AB:%.*]] = select i1 [[CMP_AB]], i32 [[A]], i32 [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[MIN_AB]], [[C:%.*]]
+; CHECK-NEXT:    [[MIN_ABC:%.*]] = select i1 [[TMP1]], i32 [[MIN_AB]], i32 [[C]]
+; CHECK-NEXT:    ret i32 [[MIN_ABC]]
+;
+  %cmp_ab = icmp slt i32 %a, %b
+  %min_ab = select i1 %cmp_ab, i32 %a, i32 %b
+  %cmp_bc = icmp slt i32 %b, %c
+  %min_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ab_bc = icmp slt i32 %min_ab, %min_bc
+  %min_abc = select i1 %cmp_ab_bc, i32 %min_ab, i32 %min_bc
+  ret i32 %min_abc
+}
+
+; max(max(a, b), max(c, b)) --> max(max(a, b), c)
+
+define <2 x i32> @common_factor_smax(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @common_factor_smax(
+; CHECK-NEXT:    [[CMP_AB:%.*]] = icmp sgt <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[MAX_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> [[A]], <2 x i32> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i32> [[MAX_AB]], [[C:%.*]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[MAX_AB]], <2 x i32> [[C]]
+; CHECK-NEXT:    ret <2 x i32> [[MAX_ABC]]
+;
+  %cmp_ab = icmp sgt <2 x i32> %a, %b
+  %max_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b
+  %cmp_cb = icmp sgt <2 x i32> %c, %b
+  %max_cb = select <2 x i1> %cmp_cb, <2 x i32> %c, <2 x i32> %b
+  %cmp_ab_cb = icmp sgt <2 x i32> %max_ab, %max_cb
+  %max_abc = select <2 x i1> %cmp_ab_cb, <2 x i32> %max_ab, <2 x i32> %max_cb
+  ret <2 x i32> %max_abc
+}
+
+; min(min(b, c), min(a, b)) --> min(min(b, c), a)
+
+define <2 x i32> @common_factor_umin(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @common_factor_umin(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ult <2 x i32> [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[MIN_BC:%.*]] = select <2 x i1> [[CMP_BC]], <2 x i32> [[B]], <2 x i32> [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[MIN_BC]], [[A:%.*]]
+; CHECK-NEXT:    [[MIN_ABC:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[MIN_BC]], <2 x i32> [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[MIN_ABC]]
+;
+  %cmp_bc = icmp ult <2 x i32> %b, %c
+  %min_bc = select <2 x i1> %cmp_bc, <2 x i32> %b, <2 x i32> %c
+  %cmp_ab = icmp ult <2 x i32> %a, %b
+  %min_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b
+  %cmp_bc_ab = icmp ult <2 x i32> %min_bc, %min_ab
+  %min_abc = select <2 x i1> %cmp_bc_ab, <2 x i32> %min_bc, <2 x i32> %min_ab
+  ret <2 x i32> %min_abc
+}
+
+; max(max(b, c), max(b, a)) --> max(max(b, c), a)
+
+define i32 @common_factor_umax(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_umax(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 [[B]], i32 [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MAX_BC]], [[A:%.*]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BC]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[MAX_ABC]]
+;
+  %cmp_bc = icmp ugt i32 %b, %c
+  %max_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ba = icmp ugt i32 %b, %a
+  %max_ba = select i1 %cmp_ba, i32 %b, i32 %a
+  %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
+  %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
+  ret i32 %max_abc
+}
+
+declare void @extra_use(i32)
+
+define i32 @common_factor_umax_extra_use_lhs(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_umax_extra_use_lhs(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 [[B]], i32 [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MAX_BC]], [[A:%.*]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BC]], i32 [[A]]
+; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BC]])
+; CHECK-NEXT:    ret i32 [[MAX_ABC]]
+;
+  %cmp_bc = icmp ugt i32 %b, %c
+  %max_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ba = icmp ugt i32 %b, %a
+  %max_ba = select i1 %cmp_ba, i32 %b, i32 %a
+  %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
+  %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
+  call void @extra_use(i32 %max_bc)
+  ret i32 %max_abc
+}
+
+define i32 @common_factor_umax_extra_use_rhs(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_umax_extra_use_rhs(
+; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 [[B]], i32 [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MAX_BA]], [[C:%.*]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BA]], i32 [[C]]
+; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BA]])
+; CHECK-NEXT:    ret i32 [[MAX_ABC]]
+;
+  %cmp_bc = icmp ugt i32 %b, %c
+  %max_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ba = icmp ugt i32 %b, %a
+  %max_ba = select i1 %cmp_ba, i32 %b, i32 %a
+  %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
+  %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
+  call void @extra_use(i32 %max_ba)
+  ret i32 %max_abc
+}
+
+define i32 @common_factor_umax_extra_use_both(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @common_factor_umax_extra_use_both(
+; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 [[B]], i32 [[C]]
+; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 [[B]], [[A:%.*]]
+; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 [[B]], i32 [[A]]
+; CHECK-NEXT:    [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
+; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BC]])
+; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BA]])
+; CHECK-NEXT:    ret i32 [[MAX_ABC]]
+;
+  %cmp_bc = icmp ugt i32 %b, %c
+  %max_bc = select i1 %cmp_bc, i32 %b, i32 %c
+  %cmp_ba = icmp ugt i32 %b, %a
+  %max_ba = select i1 %cmp_ba, i32 %b, i32 %a
+  %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
+  %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
+  call void @extra_use(i32 %max_bc)
+  call void @extra_use(i32 %max_ba)
+  ret i32 %max_abc
+}
+
+; This would assert. Don't assume that earlier min/max types match a possible later min/max.
+
+define float @not_min_of_min(i8 %i, float %x) {
+; CHECK-LABEL: @not_min_of_min(
+; CHECK-NEXT:    [[CMP1_INV:%.*]] = fcmp fast oge float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[MIN1:%.*]] = select i1 [[CMP1_INV]], float 1.000000e+00, float [[X]]
+; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast oge float [[X]], 2.000000e+00
+; CHECK-NEXT:    [[MIN2:%.*]] = select i1 [[CMP2_INV]], float 2.000000e+00, float [[X]]
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult i8 [[I:%.*]], 16
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP3]], float [[MIN1]], float [[MIN2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %cmp1 = fcmp fast ult float %x, 1.0
+  %min1 = select i1 %cmp1, float %x, float 1.0
+  %cmp2 = fcmp fast ult float %x, 2.0
+  %min2 = select i1 %cmp2, float %x, float 2.0
+  %cmp3 = icmp ult i8 %i, 16
+  %r = select i1 %cmp3, float %min1, float %min2
+  ret float %r
+}
+
+define i32 @add_umin(i32 %x) {
+; CHECK-LABEL: @add_umin(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 27
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 27
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i32 [[TMP2]], 15
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nuw i32 %x, 15
+  %c = icmp ult i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+define i32 @add_umin_constant_limit(i32 %x) {
+; CHECK-LABEL: @add_umin_constant_limit(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 41, i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nuw i32 %x, 41
+  %c = icmp ult i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+; TODO: assert that instsimplify always gets this?
+
+define i32 @add_umin_simplify(i32 %x) {
+; CHECK-LABEL: @add_umin_simplify(
+; CHECK-NEXT:    ret i32 42
+;
+  %a = add nuw i32 %x, 42
+  %c = icmp ult i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+; TODO: assert that instsimplify always gets this?
+
+define i32 @add_umin_simplify2(i32 %x) {
+; CHECK-LABEL: @add_umin_simplify2(
+; CHECK-NEXT:    ret i32 42
+;
+  %a = add nuw i32 %x, 43
+  %c = icmp ult i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_umin_wrong_pred(i32 %x) {
+; CHECK-LABEL: @add_umin_wrong_pred(
+; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nuw i32 %x, 15
+  %c = icmp slt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_umin_wrong_wrap(i32 %x) {
+; CHECK-LABEL: @add_umin_wrong_wrap(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, 15
+  %c = icmp ult i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_umin_extra_use(i32 %x, i32* %p) {
+; CHECK-LABEL: @add_umin_extra_use(
+; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 15
+; CHECK-NEXT:    store i32 [[A]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nuw i32 %x, 15
+  store i32 %a, i32* %p
+  %c = icmp ult i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+define <2 x i16> @add_umin_vec(<2 x i16> %x) {
+; CHECK-LABEL: @add_umin_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i16> [[X:%.*]], <i16 225, i16 225>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[X]], <2 x i16> <i16 225, i16 225>
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i16> [[TMP2]], <i16 15, i16 15>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %a = add nuw <2 x i16> %x, <i16 15, i16 15>
+  %c = icmp ult <2 x i16> %a, <i16 240, i16 240>
+  %r = select <2 x i1> %c, <2 x i16> %a, <2 x i16> <i16 240, i16 240>
+  ret <2 x i16> %r
+}
+
+define i37 @add_umax(i37 %x) {
+; CHECK-LABEL: @add_umax(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i37 [[X:%.*]], 37
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 37
+; CHECK-NEXT:    [[R:%.*]] = add nuw i37 [[TMP2]], 5
+; CHECK-NEXT:    ret i37 [[R]]
+;
+  %a = add nuw i37 %x, 5
+  %c = icmp ugt i37 %a, 42
+  %r = select i1 %c, i37 %a, i37 42
+  ret i37 %r
+}
+
+define i37 @add_umax_constant_limit(i37 %x) {
+; CHECK-LABEL: @add_umax_constant_limit(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i37 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 1
+; CHECK-NEXT:    [[R:%.*]] = add nuw i37 [[TMP2]], 81
+; CHECK-NEXT:    ret i37 [[R]]
+;
+  %a = add nuw i37 %x, 81
+  %c = icmp ugt i37 %a, 82
+  %r = select i1 %c, i37 %a, i37 82
+  ret i37 %r
+}
+
+; Negative test
+; TODO: assert that instsimplify always gets this?
+
+define i37 @add_umax_simplify(i37 %x) {
+; CHECK-LABEL: @add_umax_simplify(
+; CHECK-NEXT:    [[R:%.*]] = add nuw i37 [[X:%.*]], 42
+; CHECK-NEXT:    ret i37 [[R]]
+;
+  %a = add nuw i37 %x, 42
+  %c = icmp ugt i37 %a, 42
+  %r = select i1 %c, i37 %a, i37 42
+  ret i37 %r
+}
+
+; Negative test
+; TODO: assert that instsimplify always gets this?
+
+define i32 @add_umax_simplify2(i32 %x) {
+; CHECK-LABEL: @add_umax_simplify2(
+; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 57
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %a = add nuw i32 %x, 57
+  %c = icmp ugt i32 %a, 56
+  %r = select i1 %c, i32 %a, i32 56
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_umax_wrong_pred(i32 %x) {
+; CHECK-LABEL: @add_umax_wrong_pred(
+; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nuw i32 %x, 15
+  %c = icmp sgt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_umax_wrong_wrap(i32 %x) {
+; CHECK-LABEL: @add_umax_wrong_wrap(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, 15
+  %c = icmp ugt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_umax_extra_use(i32 %x, i32* %p) {
+; CHECK-LABEL: @add_umax_extra_use(
+; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 15
+; CHECK-NEXT:    store i32 [[A]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nuw i32 %x, 15
+  store i32 %a, i32* %p
+  %c = icmp ugt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+define <2 x i33> @add_umax_vec(<2 x i33> %x) {
+; CHECK-LABEL: @add_umax_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i33> [[X:%.*]], <i33 235, i33 235>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i33> [[X]], <2 x i33> <i33 235, i33 235>
+; CHECK-NEXT:    [[R:%.*]] = add nuw <2 x i33> [[TMP2]], <i33 5, i33 5>
+; CHECK-NEXT:    ret <2 x i33> [[R]]
+;
+  %a = add nuw <2 x i33> %x, <i33 5, i33 5>
+  %c = icmp ugt <2 x i33> %a, <i33 240, i33 240>
+  %r = select <2 x i1> %c, <2 x i33> %a, <2 x i33> <i33 240, i33 240>
+  ret <2 x i33> %r
+}
+
+define i8 @PR14613_umin(i8 %x) {
+; CHECK-LABEL: @PR14613_umin(
+; CHECK-NEXT:    [[U7:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15)
+; CHECK-NEXT:    ret i8 [[U7]]
+;
+  %u4 = zext i8 %x to i32
+  %u5 = add nuw nsw i32 %u4, 15
+  %u6 = icmp ult i32 %u5, 255
+  %u7 = select i1 %u6, i32 %u5, i32 255
+  %r = trunc i32 %u7 to i8
+  ret i8 %r
+}
+
+define i8 @PR14613_umax(i8 %x) {
+; CHECK-LABEL: @PR14613_umax(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], -16
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16
+; CHECK-NEXT:    [[U7:%.*]] = add nsw i8 [[TMP2]], 15
+; CHECK-NEXT:    ret i8 [[U7]]
+;
+  %u4 = zext i8 %x to i32
+  %u5 = add nuw nsw i32 %u4, 15
+  %u6 = icmp ugt i32 %u5, 255
+  %u7 = select i1 %u6, i32 %u5, i32 255
+  %r = trunc i32 %u7 to i8
+  ret i8 %r
+}
+
+define i32 @add_smin(i32 %x) {
+; CHECK-LABEL: @add_smin(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 27
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 27
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[TMP2]], 15
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, 15
+  %c = icmp slt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+define i32 @add_smin_constant_limit(i32 %x) {
+; CHECK-LABEL: @add_smin_constant_limit(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 2147483646
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2147483646
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[TMP2]], -3
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, -3
+  %c = icmp slt i32 %a, 2147483643
+  %r = select i1 %c, i32 %a, i32 2147483643
+  ret i32 %r
+}
+
+; Negative test
+; TODO: assert that instsimplify always gets this?
+
+define i32 @add_smin_simplify(i32 %x) {
+; CHECK-LABEL: @add_smin_simplify(
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[X:%.*]], -3
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, -3
+  %c = icmp slt i32 %a, 2147483644
+  %r = select i1 %c, i32 %a, i32 2147483644
+  ret i32 %r
+}
+
+; Negative test
+; TODO: assert that instsimplify always gets this?
+
+define i32 @add_smin_simplify2(i32 %x) {
+; CHECK-LABEL: @add_smin_simplify2(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], -3
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %a = add nsw i32 %x, -3
+  %c = icmp slt i32 %a, 2147483645
+  %r = select i1 %c, i32 %a, i32 2147483645
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_smin_wrong_pred(i32 %x) {
+; CHECK-LABEL: @add_smin_wrong_pred(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, 15
+  %c = icmp ult i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_smin_wrong_wrap(i32 %x) {
+; CHECK-LABEL: @add_smin_wrong_wrap(
+; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nuw i32 %x, 15
+  %c = icmp slt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_smin_extra_use(i32 %x, i32* %p) {
+; CHECK-LABEL: @add_smin_extra_use(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], 15
+; CHECK-NEXT:    store i32 [[A]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, 15
+  store i32 %a, i32* %p
+  %c = icmp slt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+define <2 x i16> @add_smin_vec(<2 x i16> %x) {
+; CHECK-LABEL: @add_smin_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i16> [[X:%.*]], <i16 225, i16 225>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[X]], <2 x i16> <i16 225, i16 225>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i16> [[TMP2]], <i16 15, i16 15>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %a = add nsw <2 x i16> %x, <i16 15, i16 15>
+  %c = icmp slt <2 x i16> %a, <i16 240, i16 240>
+  %r = select <2 x i1> %c, <2 x i16> %a, <2 x i16> <i16 240, i16 240>
+  ret <2 x i16> %r
+}
+
+define i37 @add_smax(i37 %x) {
+; CHECK-LABEL: @add_smax(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i37 [[X:%.*]], 37
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i37 [[X]], i37 37
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i37 [[TMP2]], 5
+; CHECK-NEXT:    ret i37 [[R]]
+;
+  %a = add nsw i37 %x, 5
+  %c = icmp sgt i37 %a, 42
+  %r = select i1 %c, i37 %a, i37 42
+  ret i37 %r
+}
+
+define i8 @add_smax_constant_limit(i8 %x) {
+; CHECK-LABEL: @add_smax_constant_limit(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], -127
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -127
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[TMP2]], 125
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a = add nsw i8 %x, 125
+  %c = icmp sgt i8 %a, -2
+  %r = select i1 %c, i8 %a, i8 -2
+  ret i8 %r
+}
+
+; Negative test
+; TODO: assert that instsimplify always gets this?
+
+define i8 @add_smax_simplify(i8 %x) {
+; CHECK-LABEL: @add_smax_simplify(
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[X:%.*]], 126
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a = add nsw i8 %x, 126
+  %c = icmp sgt i8 %a, -2
+  %r = select i1 %c, i8 %a, i8 -2
+  ret i8 %r
+}
+
+; Negative test
+; TODO: assert that instsimplify always gets this?
+
+define i8 @add_smax_simplify2(i8 %x) {
+; CHECK-LABEL: @add_smax_simplify2(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], 127
+; CHECK-NEXT:    ret i8 [[A]]
+;
+  %a = add nsw i8 %x, 127
+  %c = icmp sgt i8 %a, -2
+  %r = select i1 %c, i8 %a, i8 -2
+  ret i8 %r
+}
+
+; Negative test
+
+define i32 @add_smax_wrong_pred(i32 %x) {
+; CHECK-LABEL: @add_smax_wrong_pred(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, 15
+  %c = icmp ugt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_smax_wrong_wrap(i32 %x) {
+; CHECK-LABEL: @add_smax_wrong_wrap(
+; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nuw i32 %x, 15
+  %c = icmp sgt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @add_smax_extra_use(i32 %x, i32* %p) {
+; CHECK-LABEL: @add_smax_extra_use(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], 15
+; CHECK-NEXT:    store i32 [[A]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[A]], 42
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add nsw i32 %x, 15
+  store i32 %a, i32* %p
+  %c = icmp sgt i32 %a, 42
+  %r = select i1 %c, i32 %a, i32 42
+  ret i32 %r
+}
+
+define <2 x i33> @add_smax_vec(<2 x i33> %x) {
+; CHECK-LABEL: @add_smax_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i33> [[X:%.*]], <i33 235, i33 235>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i33> [[X]], <2 x i33> <i33 235, i33 235>
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i33> [[TMP2]], <i33 5, i33 5>
+; CHECK-NEXT:    ret <2 x i33> [[R]]
+;
+  %a = add nsw <2 x i33> %x, <i33 5, i33 5>
+  %c = icmp sgt <2 x i33> %a, <i33 240, i33 240>
+  %r = select <2 x i1> %c, <2 x i33> %a, <2 x i33> <i33 240, i33 240>
+  ret <2 x i33> %r
+}
+
+define i8 @PR14613_smin(i8 %x) {
+; CHECK-LABEL: @PR14613_smin(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 40
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 40
+; CHECK-NEXT:    [[U7:%.*]] = add nsw i8 [[TMP2]], 15
+; CHECK-NEXT:    ret i8 [[U7]]
+;
+  %u4 = sext i8 %x to i32
+  %u5 = add nuw nsw i32 %u4, 15
+  %u6 = icmp slt i32 %u5, 55
+  %u7 = select i1 %u6, i32 %u5, i32 55
+  %r = trunc i32 %u7 to i8
+  ret i8 %r
+}
+
+define i8 @PR14613_smax(i8 %x) {
+; CHECK-LABEL: @PR14613_smax(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 40
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 40
+; CHECK-NEXT:    [[U7:%.*]] = add nuw i8 [[TMP2]], 15
+; CHECK-NEXT:    ret i8 [[U7]]
+;
+  %u4 = sext i8 %x to i32
+  %u5 = add nuw nsw i32 %u4, 15
+  %u6 = icmp sgt i32 %u5, 55
+  %u7 = select i1 %u6, i32 %u5, i32 55
+  %r = trunc i32 %u7 to i8
+  ret i8 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/minmax-fp.ll b/llvm/test/Transforms/InstCombine/minmax-fp.ll
new file mode 100644
index 00000000000..11418156a48
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/minmax-fp.ll
@@ -0,0 +1,257 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; This is the canonical form for a type-changing min/max.
+define double @t1(float %a) {
+; CHECK-LABEL: @t1(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge float [[A:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float 5.000000e+00, float [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext float [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = fcmp ult float %a, 5.0
+  %2 = select i1 %1, float %a, float 5.0
+  %3 = fpext float %2 to double
+  ret double %3
+}
+
+; Check this is converted into canonical form, as above.
+define double @t2(float %a) {
+; CHECK-LABEL: @t2(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge float [[A:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float 5.000000e+00, float [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext float [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = fcmp ult float %a, 5.0
+  %2 = fpext float %a to double
+  %3 = select i1 %1, double %2, double 5.0
+  ret double %3
+}
+
+; Same again, with trunc.
+define float @t4(double %a) {
+; CHECK-LABEL: @t4(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge double [[A:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], double 5.000000e+00, double [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fptrunc double [[TMP1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %1 = fcmp ult double %a, 5.0
+  %2 = fptrunc double %a to float
+  %3 = select i1 %1, float %2, float 5.0
+  ret float %3
+}
+
+; different values, should not be converted.
+define double @t5(float %a) {
+; CHECK-LABEL: @t5(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult float [[A:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext float [[A]] to double
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], double [[TMP2]], double 5.001000e+00
+; CHECK-NEXT:    ret double [[TMP3]]
+;
+  %1 = fcmp ult float %a, 5.0
+  %2 = fpext float %a to double
+  %3 = select i1 %1, double %2, double 5.001
+  ret double %3
+}
+
+; From IEEE754: "Comparisons shall ignore the sign of zero (so +0 = -0)."
+; So the compare constant may be treated as +0.0, and we sink the fpext.
+
+define double @t6(float %a) {
+; CHECK-LABEL: @t6(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float 0.000000e+00, float [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext float [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = fcmp ult float %a, -0.0
+  %2 = fpext float %a to double
+  %3 = select i1 %1, double %2, double 0.0
+  ret double %3
+}
+
+; From IEEE754: "Comparisons shall ignore the sign of zero (so +0 = -0)."
+; So the compare constant may be treated as -0.0, and we sink the fpext.
+
+define double @t7(float %a) {
+; CHECK-LABEL: @t7(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float -0.000000e+00, float [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext float [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = fcmp ult float %a, 0.0
+  %2 = fpext float %a to double
+  %3 = select i1 %1, double %2, double -0.0
+  ret double %3
+}
+
+; min(min(x, 0.0), 0.0) --> min(x, 0.0)
+
+define float @fmin_fmin_zero_mismatch(float %x) {
+; CHECK-LABEL: @fmin_fmin_zero_mismatch(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[MIN2:%.*]] = select i1 [[TMP1]], float [[X]], float 0.000000e+00
+; CHECK-NEXT:    ret float [[MIN2]]
+;
+  %cmp1 = fcmp olt float %x, -0.0
+  %min1 = select i1 %cmp1, float %x, float 0.0
+  %cmp2 = fcmp olt float %min1, 0.0
+  %min2 = select i1 %cmp2, float %min1, float 0.0
+  ret float %min2
+}
+
+; max(max(x, -0.0), -0.0) --> max(x, -0.0)
+
+define float @fmax_fmax_zero_mismatch(float %x) {
+; CHECK-LABEL: @fmax_fmax_zero_mismatch(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt float [[X:%.*]], -0.000000e+00
+; CHECK-NEXT:    [[MAX11:%.*]] = select i1 [[TMP1]], float [[X]], float -0.000000e+00
+; CHECK-NEXT:    ret float [[MAX11]]
+;
+  %cmp1 = fcmp ogt float %x, 0.0
+  %max1 = select i1 %cmp1, float %x, float -0.0
+  %cmp2 = fcmp ogt float 0.0, %max1
+  %max2 = select i1 %cmp2, float -0.0, float %max1
+  ret float %max2
+}
+
+define i64 @t8(float %a) {
+; CHECK-LABEL: @t8(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge float [[A:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float 5.000000e+00, float [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fptoui float [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %1 = fcmp ult float %a, 5.0
+  %2 = fptoui float %a to i64
+  %3 = select i1 %1, i64 %2, i64 5
+  ret i64 %3
+}
+
+define i8 @t9(float %a) {
+; CHECK-LABEL: @t9(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float 0.000000e+00, float [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fptosi float [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %1 = fcmp ult float %a, 0.0
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 %2, i8 0
+  ret i8 %3
+}
+
+  ; Either operand could be NaN, but fast modifier applied.
+define i8 @t11(float %a, float %b) {
+; CHECK-LABEL: @t11(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[DOTV:%.*]] = select i1 [[DOTINV]], float [[A]], float [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[DOTV]] to i8
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = fcmp fast ult float %b, %a
+  %2 = fptosi float %a to i8
+  %3 = fptosi float %b to i8
+  %4 = select i1 %1, i8 %3, i8 %2
+  ret i8 %4
+}
+
+; Either operand could be NaN, but nnan modifier applied.
+define i8 @t12(float %a, float %b) {
+; CHECK-LABEL: @t12(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp nnan oge float [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[DOTV:%.*]] = select i1 [[DOTINV]], float [[A]], float [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fptosi float [[DOTV]] to i8
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = fcmp nnan ult float %b, %a
+  %2 = fptosi float %a to i8
+  %3 = fptosi float %b to i8
+  %4 = select i1 %1, i8 %3, i8 %2
+  ret i8 %4
+}
+
+; Float and int values do not match.
+define i8 @t13(float %a) {
+; CHECK-LABEL: @t13(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult float [[A:%.*]], 1.500000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = fptosi float [[A]] to i8
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 1
+; CHECK-NEXT:    ret i8 [[TMP3]]
+;
+  %1 = fcmp ult float %a, 1.5
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 %2, i8 1
+  ret i8 %3
+}
+
+; %a could be -0.0, but it doesn't matter because the conversion to int is the same for 0.0 or -0.0.
+define i8 @t14(float %a) {
+; CHECK-LABEL: @t14(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp oge float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float 0.000000e+00, float [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fptosi float [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %1 = fcmp ule float %a, 0.0
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 %2, i8 0
+  ret i8 %3
+}
+
+define i8 @t14_commute(float %a) {
+; CHECK-LABEL: @t14_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = fptosi float [[TMP2]] to i8
+; CHECK-NEXT:    ret i8 [[TMP3]]
+;
+  %1 = fcmp ule float %a, 0.0
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 0, i8 %2
+  ret i8 %3
+}
+
+define i8 @t15(float %a) {
+; CHECK-LABEL: @t15(
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp nsz oge float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTINV]], float 0.000000e+00, float [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fptosi float [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %1 = fcmp nsz ule float %a, 0.0
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 %2, i8 0
+  ret i8 %3
+}
+
+define double @t16(i32 %x) {
+; CHECK-LABEL: @t16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[CST:%.*]] = sitofp i32 [[X]] to double
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], double [[CST]], double 5.000000e-01
+; CHECK-NEXT:    ret double [[SEL]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %cst = sitofp i32 %x to double
+  %sel = select i1 %cmp, double %cst, double 5.000000e-01
+  ret double %sel
+}
+
+define double @t17(i32 %x) {
+; CHECK-LABEL: @t17(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = sitofp i32 [[SEL1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %cmp = icmp sgt i32 %x, 2
+  %cst = sitofp i32 %x to double
+  %sel = select i1 %cmp, double %cst, double 2.0
+  ret double %sel
+}
+
diff --git a/llvm/test/Transforms/InstCombine/minnum.ll b/llvm/test/Transforms/InstCombine/minnum.ll
new file mode 100644
index 00000000000..73b4f0c9251
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/minnum.ll
@@ -0,0 +1,318 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.minnum.f32(float, float)
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
+
+declare double @llvm.minnum.f64(double, double)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+
+declare float @llvm.maxnum.f32(float, float)
+
+define float @constant_fold_minnum_f32() {
+; CHECK-LABEL: @constant_fold_minnum_f32(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %x = call float @llvm.minnum.f32(float 1.0, float 2.0)
+  ret float %x
+}
+
+define float @constant_fold_minnum_f32_inv() {
+; CHECK-LABEL: @constant_fold_minnum_f32_inv(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %x = call float @llvm.minnum.f32(float 2.0, float 1.0)
+  ret float %x
+}
+
+define float @constant_fold_minnum_f32_nan0() {
+; CHECK-LABEL: @constant_fold_minnum_f32_nan0(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %x = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 2.0)
+  ret float %x
+}
+
+define float @constant_fold_minnum_f32_nan1() {
+; CHECK-LABEL: @constant_fold_minnum_f32_nan1(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %x = call float @llvm.minnum.f32(float 2.0, float 0x7FF8000000000000)
+  ret float %x
+}
+
+define float @constant_fold_minnum_f32_nan_nan() {
+; CHECK-LABEL: @constant_fold_minnum_f32_nan_nan(
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %x = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
+  ret float %x
+}
+
+define float @constant_fold_minnum_f32_p0_p0() {
+; CHECK-LABEL: @constant_fold_minnum_f32_p0_p0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %x = call float @llvm.minnum.f32(float 0.0, float 0.0)
+  ret float %x
+}
+
+define float @constant_fold_minnum_f32_p0_n0() {
+; CHECK-LABEL: @constant_fold_minnum_f32_p0_n0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %x = call float @llvm.minnum.f32(float 0.0, float -0.0)
+  ret float %x
+}
+
+define float @constant_fold_minnum_f32_n0_p0() {
+; CHECK-LABEL: @constant_fold_minnum_f32_n0_p0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %x = call float @llvm.minnum.f32(float -0.0, float 0.0)
+  ret float %x
+}
+
+define float @constant_fold_minnum_f32_n0_n0() {
+; CHECK-LABEL: @constant_fold_minnum_f32_n0_n0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %x = call float @llvm.minnum.f32(float -0.0, float -0.0)
+  ret float %x
+}
+
+define <4 x float> @constant_fold_minnum_v4f32() {
+; CHECK-LABEL: @constant_fold_minnum_v4f32(
+; CHECK-NEXT:    ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 5.000000e+00>
+;
+  %x = call <4 x float> @llvm.minnum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+define double @constant_fold_minnum_f64() {
+; CHECK-LABEL: @constant_fold_minnum_f64(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %x = call double @llvm.minnum.f64(double 1.0, double 2.0)
+  ret double %x
+}
+
+define double @constant_fold_minnum_f64_nan0() {
+; CHECK-LABEL: @constant_fold_minnum_f64_nan0(
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+  %x = call double @llvm.minnum.f64(double 0x7FF8000000000000, double 2.0)
+  ret double %x
+}
+
+define double @constant_fold_minnum_f64_nan1() {
+; CHECK-LABEL: @constant_fold_minnum_f64_nan1(
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+  %x = call double @llvm.minnum.f64(double 2.0, double 0x7FF8000000000000)
+  ret double %x
+}
+
+define double @constant_fold_minnum_f64_nan_nan() {
+; CHECK-LABEL: @constant_fold_minnum_f64_nan_nan(
+; CHECK-NEXT:    ret double 0x7FF8000000000000
+;
+  %x = call double @llvm.minnum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000)
+  ret double %x
+}
+
+define float @canonicalize_constant_minnum_f32(float %x) {
+; CHECK-LABEL: @canonicalize_constant_minnum_f32(
+; CHECK-NEXT:    [[Y:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    ret float [[Y]]
+;
+  %y = call float @llvm.minnum.f32(float 1.0, float %x)
+  ret float %y
+}
+
+define float @minnum_f32_nan_val(float %x) {
+; CHECK-LABEL: @minnum_f32_nan_val(
+; CHECK-NEXT:    ret float [[X:%.*]]
+;
+  %y = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %x)
+  ret float %y
+}
+
+define float @minnum_f32_val_nan(float %x) {
+; CHECK-LABEL: @minnum_f32_val_nan(
+; CHECK-NEXT:    ret float [[X:%.*]]
+;
+  %y = call float @llvm.minnum.f32(float %x, float 0x7FF8000000000000)
+  ret float %y
+}
+
+define float @minnum_f32_1_minnum_val_p0(float %x) {
+; CHECK-LABEL: @minnum_f32_1_minnum_val_p0(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.minnum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minnum.f32(float %x, float 0.0)
+  %z = call float @llvm.minnum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @minnum_f32_1_minnum_p0_val_fast(float %x) {
+; CHECK-LABEL: @minnum_f32_1_minnum_p0_val_fast(
+; CHECK-NEXT: [[RES:%.*]] = call fast float @llvm.minnum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minnum.f32(float 0.0, float %x)
+  %z = call fast float @llvm.minnum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @minnum_f32_1_minnum_p0_val_nnan_ninf(float %x) {
+; CHECK-LABEL: @minnum_f32_1_minnum_p0_val_nnan_ninf(
+; CHECK-NEXT: [[RES:%.*]] = call nnan ninf float @llvm.minnum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minnum.f32(float 0.0, float %x)
+  %z = call nnan ninf float @llvm.minnum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define float @minnum_f32_p0_minnum_val_n0(float %x) {
+; CHECK-LABEL: @minnum_f32_p0_minnum_val_n0(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.minnum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minnum.f32(float %x, float -0.0)
+  %z = call float @llvm.minnum.f32(float %y, float 0.0)
+  ret float %z
+}
+
+define float @minnum_f32_1_minnum_p0_val(float %x) {
+; CHECK-LABEL: @minnum_f32_1_minnum_p0_val(
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.minnum.f32(float %x, float 0.000000e+00)
+; CHECK-NEXT: ret float [[RES]]
+  %y = call float @llvm.minnum.f32(float 0.0, float %x)
+  %z = call float @llvm.minnum.f32(float %y, float 1.0)
+  ret float %z
+}
+
+define <2 x float> @minnum_f32_1_minnum_val_p0_val_v2f32(<2 x float> %x) {
+; CHECK-LABEL: @minnum_f32_1_minnum_val_p0_val_v2f32(
+; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
+; CHECK-NEXT: ret <2 x float> [[RES]]
+  %y = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
+  %z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %y, <2 x float><float 1.0, float 1.0>)
+  ret <2 x float> %z
+}
+
+define float @minnum4(float %x, float %y, float %z, float %w) {
+; CHECK-LABEL: @minnum4(
+; CHECK-NEXT:    [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call float @llvm.minnum.f32(float [[Z:%.*]], float [[W:%.*]])
+; CHECK-NEXT:    [[C:%.*]] = call float @llvm.minnum.f32(float [[A]], float [[B]])
+; CHECK-NEXT:    ret float [[C]]
+;
+  %a = call float @llvm.minnum.f32(float %x, float %y)
+  %b = call float @llvm.minnum.f32(float %z, float %w)
+  %c = call float @llvm.minnum.f32(float %a, float %b)
+  ret float %c
+}
+
+define float @minnum_x_maxnum_x_y(float %x, float %y) {
+; CHECK-LABEL: @minnum_x_maxnum_x_y(
+; CHECK-NEXT:    [[A:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[A]])
+; CHECK-NEXT:    ret float [[B]]
+;
+  %a = call float @llvm.maxnum.f32(float %x, float %y)
+  %b = call float @llvm.minnum.f32(float %x, float %a)
+  ret float %b
+}
+
+define float @maxnum_x_minnum_x_y(float %x, float %y) {
+; CHECK-LABEL: @maxnum_x_minnum_x_y(
+; CHECK-NEXT:    [[A:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[A]])
+; CHECK-NEXT:    ret float [[B]]
+;
+  %a = call float @llvm.minnum.f32(float %x, float %y)
+  %b = call float @llvm.maxnum.f32(float %x, float %a)
+  ret float %b
+}
+
+; PR37405 - https://bugs.llvm.org/show_bug.cgi?id=37405
+
+define double @neg_neg(double %x, double %y) {
+; CHECK-LABEL: @neg_neg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maxnum.f64(double [[X:%.*]], double [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fsub double -0.0, %x
+  %negy = fsub double -0.0, %y
+  %r = call double @llvm.minnum.f64(double %negx, double %negy)
+  ret double %r
+}
+
+; FMF is not required, but it should be propagated from the intrinsic (not the fnegs).
+; Also, make sure this works with vectors.
+
+define <2 x double> @neg_neg_vec_fmf(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @neg_neg_vec_fmf(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf <2 x double> @llvm.maxnum.v2f64(<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub nnan ninf <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %negx = fsub reassoc <2 x double> <double -0.0, double -0.0>, %x
+  %negy = fsub fast <2 x double> <double -0.0, double -0.0>, %y
+  %r = call nnan ninf <2 x double> @llvm.minnum.v2f64(<2 x double> %negx, <2 x double> %negy)
+  ret <2 x double> %r
+}
+
+; 1 extra use of an intermediate value should still allow the fold,
+; but 2 would require more instructions than we started with.
+
+declare void @use(double)
+define double @neg_neg_extra_use_x(double %x, double %y) {
+; CHECK-LABEL: @neg_neg_extra_use_x(
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub double -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maxnum.f64(double [[X]], double [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(double [[NEGX]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fsub double -0.0, %x
+  %negy = fsub double -0.0, %y
+  %r = call double @llvm.minnum.f64(double %negx, double %negy)
+  call void @use(double %negx)
+  ret double %r
+}
+
+define double @neg_neg_extra_use_y(double %x, double %y) {
+; CHECK-LABEL: @neg_neg_extra_use_y(
+; CHECK-NEXT:    [[NEGY:%.*]] = fsub double -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maxnum.f64(double [[X:%.*]], double [[Y]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(double [[NEGY]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fsub double -0.0, %x
+  %negy = fsub double -0.0, %y
+  %r = call double @llvm.minnum.f64(double %negx, double %negy)
+  call void @use(double %negy)
+  ret double %r
+}
+
+define double @neg_neg_extra_use_x_and_y(double %x, double %y) {
+; CHECK-LABEL: @neg_neg_extra_use_x_and_y(
+; CHECK-NEXT:    [[NEGX:%.*]] = fsub double -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[NEGY:%.*]] = fsub double -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = call double @llvm.minnum.f64(double [[NEGX]], double [[NEGY]])
+; CHECK-NEXT:    call void @use(double [[NEGX]])
+; CHECK-NEXT:    call void @use(double [[NEGY]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fsub double -0.0, %x
+  %negy = fsub double -0.0, %y
+  %r = call double @llvm.minnum.f64(double %negx, double %negy)
+  call void @use(double %negx)
+  call void @use(double %negy)
+  ret double %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/misc-2002.ll b/llvm/test/Transforms/InstCombine/misc-2002.ll
new file mode 100644
index 00000000000..1c44e17edbd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/misc-2002.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @hang_2002-03-11(i32 %X) {
+; CHECK-LABEL: @hang_2002-03-11(
+; CHECK-NEXT:    ret void
+;
+  %reg117 = add i32 %X, 0
+  ret void
+}
+
+; Instcombine was missing a test that caused it to make illegal transformations
+; sometimes. In this case, it transformed the sub into an add:
+
+define i32 @sub_failure_2002-05-14(i32 %i, i32 %j) {
+; CHECK-LABEL: @sub_failure_2002-05-14(
+; CHECK-NEXT:    [[A:%.*]] = mul i32 %i, %j
+; CHECK-NEXT:    [[B:%.*]] = sub i32 2, [[A]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = mul i32 %i, %j
+  %B = sub i32 2, %A
+  ret i32 %B
+}
+
+; This testcase was incorrectly getting completely eliminated. There should be
+; SOME instruction named %c here, even if it's a bitwise and.
+
+define i64 @cast_test_2002-08-02(i64 %A) {
+; CHECK-LABEL: @cast_test_2002-08-02(
+; CHECK-NEXT:    [[C2:%.*]] = and i64 %A, 255
+; CHECK-NEXT:    ret i64 [[C2]]
+;
+  %c1 = trunc i64 %A to i8
+  %c2 = zext i8 %c1 to i64
+  ret i64 %c2
+}
+
+define i32 @missed_const_prop_2002-12-05(i32 %A) {
+; CHECK-LABEL: @missed_const_prop_2002-12-05(
+; CHECK-NEXT:    ret i32 0
+;
+  %A.neg = sub i32 0, %A
+  %.neg = sub i32 0, 1
+  %X = add i32 %.neg, 1
+  %Y.neg.ra = add i32 %A, %X
+  %r = add i32 %A.neg, %Y.neg.ra
+  ret i32 %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
new file mode 100644
index 00000000000..fcff725cdf6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @foo(i32 %x, i32 %y) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[A:%.*]] = and i32 %x, 7
+; CHECK-NEXT:    [[B:%.*]] = and i32 %y, 7
+; CHECK-NEXT:    [[C:%.*]] = mul nuw nsw i32 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = shl nuw i32 [[C]], 26
+; CHECK-NEXT:    [[E:%.*]] = ashr exact i32 [[D]], 26
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %a = and i32 %x, 7
+  %b = and i32 %y, 7
+  %c = mul i32 %a, %b
+  %d = shl i32 %c, 26
+  %e = ashr i32 %d, 26
+  ret i32 %e
+}
diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
new file mode 100644
index 00000000000..b1dc9d51332
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @pow2_multiplier(i32 %A) {
+; CHECK-LABEL: @pow2_multiplier(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = mul i32 %A, 2
+  ret i32 %B
+}
+
+define <2 x i32> @pow2_multiplier_vec(<2 x i32> %A) {
+; CHECK-LABEL: @pow2_multiplier_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i32> [[A:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %B = mul <2 x i32> %A, <i32 8, i32 8>
+  ret <2 x i32> %B
+}
+
+define i8 @combine_shl(i8 %A) {
+; CHECK-LABEL: @combine_shl(
+; CHECK-NEXT:    [[C:%.*]] = shl i8 [[A:%.*]], 6
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = mul i8 %A, 8
+  %C = mul i8 %B, 8
+  ret i8 %C
+}
+
+define i32 @neg(i32 %i) {
+; CHECK-LABEL: @neg(
+; CHECK-NEXT:    [[TMP:%.*]] = sub i32 0, [[I:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+  %tmp = mul i32 %i, -1
+  ret i32 %tmp
+}
+
+; Use the sign-bit as a mask:
+; (zext (A < 0)) * B --> (A >> 31) & B
+
+define i32 @test10(i32 %a, i32 %b) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %c = icmp slt i32 %a, 0
+  %d = zext i1 %c to i32
+  %e = mul i32 %d, %b
+  ret i32 %e
+}
+
+define i32 @test11(i32 %a, i32 %b) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %c = icmp sle i32 %a, -1
+  %d = zext i1 %c to i32
+  %e = mul i32 %d, %b
+  ret i32 %e
+}
+
+declare void @use32(i32)
+
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[A_LOBIT:%.*]] = lshr i32 [[A:%.*]], 31
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[A_LOBIT]])
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %c = icmp ugt i32 %a, 2147483647
+  %d = zext i1 %c to i32
+  %e = mul i32 %d, %b
+  call void @use32(i32 %d)
+  ret i32 %e
+}
+
+; rdar://7293527
+define i32 @test15(i32 %A, i32 %B) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[M:%.*]] = shl i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %shl = shl i32 1, %B
+  %m = mul i32 %shl, %A
+  ret i32 %m
+}
+
+; X * Y (when Y is a boolean) --> Y ? X : 0
+
+define i32 @mul_bool(i32 %x, i1 %y) {
+; CHECK-LABEL: @mul_bool(
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[Y:%.*]], i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %z = zext i1 %y to i32
+  %m = mul i32 %x, %z
+  ret i32 %m
+}
+
+; Commute and test vector type.
+
+define <2 x i32> @mul_bool_vec(<2 x i32> %x, <2 x i1> %y) {
+; CHECK-LABEL: @mul_bool_vec(
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %z = zext <2 x i1> %y to <2 x i32>
+  %m = mul <2 x i32> %x, %z
+  ret <2 x i32> %m
+}
+
+define <2 x i32> @mul_bool_vec_commute(<2 x i32> %x, <2 x i1> %y) {
+; CHECK-LABEL: @mul_bool_vec_commute(
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %z = zext <2 x i1> %y to <2 x i32>
+  %m = mul <2 x i32> %z, %x
+  ret <2 x i32> %m
+}
+
+; (A >>u 31) * B --> (A >>s 31) & B
+
+define i32 @signbit_mul(i32 %a, i32 %b) {
+; CHECK-LABEL: @signbit_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %d = lshr i32 %a, 31
+  %e = mul i32 %d, %b
+  ret i32 %e
+}
+
+define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
+; CHECK-LABEL: @signbit_mul_commute_extra_use(
+; CHECK-NEXT:    [[D:%.*]] = lshr i32 [[A:%.*]], 31
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[D]])
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %d = lshr i32 %a, 31
+  %e = mul i32 %b, %d
+  call void @use32(i32 %d)
+  ret i32 %e
+}
+
+; (A >>u 31)) * B --> (A >>s 31) & B
+
+define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @signbit_mul_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[E]]
+;
+  %d = lshr <2 x i32> %a, <i32 31, i32 31>
+  %e = mul <2 x i32> %d, %b
+  ret <2 x i32> %e
+}
+
+define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @signbit_mul_vec_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[E]]
+;
+  %d = lshr <2 x i32> %a, <i32 31, i32 31>
+  %e = mul <2 x i32> %b, %d
+  ret <2 x i32> %e
+}
+
+define i32 @test18(i32 %A, i32 %B) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    ret i32 0
+;
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
+  %E = mul i32 %C, %D
+  %F = and i32 %E, 16
+  ret i32 %F
+}
+
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare void @use(i1)
+
+define i32 @test19(i32 %A, i32 %B) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    call void @use(i1 false)
+; CHECK-NEXT:    ret i32 0
+;
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
+
+; It would be nice if we also started proving that this doesn't overflow.
+  %E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D)
+  %F = extractvalue {i32, i1} %E, 0
+  %G = extractvalue {i32, i1} %E, 1
+  call void @use(i1 %G)
+  %H = and i32 %F, 16
+  ret i32 %H
+}
+
+define <2 x i64> @test20(<2 x i64> %A) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i64> [[A:%.*]], <i64 3, i64 2>
+; CHECK-NEXT:    [[C:%.*]] = add <2 x i64> [[TMP1]], <i64 36, i64 28>
+; CHECK-NEXT:    ret <2 x i64> [[C]]
+;
+  %B = add <2 x i64> %A, <i64 12, i64 14>
+  %C = mul <2 x i64> %B, <i64 3, i64 2>
+  ret <2 x i64> %C
+}
+
+define <2 x i1> @test21(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %C = mul <2 x i1> %A, %B
+  ret <2 x i1> %C
+}
+
+define i32 @test22(i32 %A) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[B:%.*]] = sub nsw i32 0, [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = mul nsw i32 %A, -1
+  ret i32 %B
+}
+
+define i32 @test23(i32 %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[C:%.*]] = mul nuw i32 [[A:%.*]], 6
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = shl nuw i32 %A, 1
+  %C = mul nuw i32 %B, 3
+  ret i32 %C
+}
+
+define i32 @test24(i32 %A) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[C:%.*]] = mul nsw i32 [[A:%.*]], 6
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = shl nsw i32 %A, 1
+  %C = mul nsw i32 %B, 3
+  ret i32 %C
+}
+
+define i32 @neg_neg_mul(i32 %A, i32 %B) {
+; CHECK-LABEL: @neg_neg_mul(
+; CHECK-NEXT:    [[E:%.*]] = mul i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %C = sub i32 0, %A
+  %D = sub i32 0, %B
+  %E = mul i32 %C, %D
+  ret i32 %E
+}
+
+define i32 @neg_neg_mul_nsw(i32 %A, i32 %B) {
+; CHECK-LABEL: @neg_neg_mul_nsw(
+; CHECK-NEXT:    [[E:%.*]] = mul nsw i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %C = sub nsw i32 0, %A
+  %D = sub nsw i32 0, %B
+  %E = mul nsw i32 %C, %D
+  ret i32 %E
+}
+
+define i124 @neg_neg_mul_apint(i124 %A, i124 %B) {
+; CHECK-LABEL: @neg_neg_mul_apint(
+; CHECK-NEXT:    [[E:%.*]] = mul i124 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i124 [[E]]
+;
+  %C = sub i124 0, %A
+  %D = sub i124 0, %B
+  %E = mul i124 %C, %D
+  ret i124 %E
+}
+
+define i32 @neg_mul_constant(i32 %A) {
+; CHECK-LABEL: @neg_mul_constant(
+; CHECK-NEXT:    [[E:%.*]] = mul i32 [[A:%.*]], -7
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %C = sub i32 0, %A
+  %E = mul i32 %C, 7
+  ret i32 %E
+}
+
+define i55 @neg_mul_constant_apint(i55 %A) {
+; CHECK-LABEL: @neg_mul_constant_apint(
+; CHECK-NEXT:    [[E:%.*]] = mul i55 [[A:%.*]], -7
+; CHECK-NEXT:    ret i55 [[E]]
+;
+  %C = sub i55 0, %A
+  %E = mul i55 %C, 7
+  ret i55 %E
+}
+
+define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) {
+; CHECK-LABEL: @neg_mul_constant_vec(
+; CHECK-NEXT:    [[B:%.*]] = mul <3 x i8> [[A:%.*]], <i8 -5, i8 -5, i8 -5>
+; CHECK-NEXT:    ret <3 x i8> [[B]]
+;
+  %A = sub <3 x i8> zeroinitializer, %a
+  %B = mul <3 x i8> %A, <i8 5, i8 5, i8 5>
+  ret <3 x i8> %B
+}
+
+define <3 x i4> @neg_mul_constant_vec_weird(<3 x i4> %a) {
+; CHECK-LABEL: @neg_mul_constant_vec_weird(
+; CHECK-NEXT:    [[B:%.*]] = mul <3 x i4> [[A:%.*]], <i4 -5, i4 -5, i4 -5>
+; CHECK-NEXT:    ret <3 x i4> [[B]]
+;
+  %A = sub <3 x i4> zeroinitializer, %a
+  %B = mul <3 x i4> %A, <i4 5, i4 5, i4 5>
+  ret <3 x i4> %B
+}
+
+define i32 @test26(i32 %A, i32 %B) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[D:%.*]] = shl nsw i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = shl nsw i32 1, %B
+  %D = mul nsw i32 %A, %C
+  ret i32 %D
+}
+
+define i32 @test27(i32 %A, i32 %B) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[D:%.*]] = shl nuw i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = shl i32 1, %B
+  %D = mul nuw i32 %A, %C
+  ret i32 %D
+}
+
+define i32 @test28(i32 %A) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 1, [[A:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = shl i32 1, %A
+  %C = mul nsw i32 %B, %B
+  ret i32 %C
+}
+
+define i64 @test29(i31 %A, i31 %B) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[C:%.*]] = sext i31 [[A:%.*]] to i64
+; CHECK-NEXT:    [[D:%.*]] = sext i31 [[B:%.*]] to i64
+; CHECK-NEXT:    [[E:%.*]] = mul nsw i64 [[C]], [[D]]
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %C = sext i31 %A to i64
+  %D = sext i31 %B to i64
+  %E = mul i64 %C, %D
+  ret i64 %E
+}
+
+define i64 @test30(i32 %A, i32 %B) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[C:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[D:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[E:%.*]] = mul nuw i64 [[C]], [[D]]
+; CHECK-NEXT:    ret i64 [[E]]
+;
+  %C = zext i32 %A to i64
+  %D = zext i32 %B to i64
+  %E = mul i64 %C, %D
+  ret i64 %E
+}
+
+@PR22087 = external global i32
+define i32 @test31(i32 %V) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[V:%.*]], zext (i1 icmp ne (i32* inttoptr (i64 1 to i32*), i32* @PR22087) to i32)
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %mul = mul i32 %V, shl (i32 1, i32 zext (i1 icmp ne (i32* inttoptr (i64 1 to i32*), i32* @PR22087) to i32))
+  ret i32 %mul
+}
+
+define i32 @test32(i32 %X) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[X:%.*]], 31
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %mul = mul nsw i32 %X, -2147483648
+  ret i32 %mul
+}
+
+define <2 x i32> @test32vec(<2 x i32> %X) {
+; CHECK-LABEL: @test32vec(
+; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[MUL]]
+;
+  %mul = mul nsw <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
+  ret <2 x i32> %mul
+}
+
+define i32 @test33(i32 %X) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[X:%.*]], 30
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %mul = mul nsw i32 %X, 1073741824
+  ret i32 %mul
+}
+
+define <2 x i32> @test33vec(<2 x i32> %X) {
+; CHECK-LABEL: @test33vec(
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    ret <2 x i32> [[MUL]]
+;
+  %mul = mul nsw <2 x i32> %X, <i32 1073741824, i32 1073741824>
+  ret <2 x i32> %mul
+}
+
+define i128 @test34(i128 %X) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i128 [[X:%.*]], 1
+; CHECK-NEXT:    ret i128 [[MUL]]
+;
+  %mul = mul nsw i128 %X, 2
+  ret i128 %mul
+}
+
+define i32 @test_mul_canonicalize_op0(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_canonicalize_op0(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %neg = sub i32 0, %x
+  %mul = mul i32 %neg, %y
+  ret i32 %mul
+}
+
+define i32 @test_mul_canonicalize_op1(i32 %x, i32 %z) {
+; CHECK-LABEL: @test_mul_canonicalize_op1(
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[Z:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %y = mul i32 %z, 3
+  %neg = sub i32 0, %x
+  %mul = mul i32 %y, %neg
+  ret i32 %mul
+}
+
+define i32 @test_mul_canonicalize_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_canonicalize_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %neg = sub nsw i32 0, %x
+  %mul = mul nsw i32 %neg, %y
+  ret i32 %mul
+}
+
+define <2 x i32> @test_mul_canonicalize_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test_mul_canonicalize_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = sub <2 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[MUL]]
+;
+  %neg = sub <2 x i32> <i32 0, i32 0>, %x
+  %mul = mul <2 x i32> %neg, %y
+  ret <2 x i32> %mul
+}
+
+define i32 @test_mul_canonicalize_multiple_uses(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_mul_canonicalize_multiple_uses(
+; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul i32 [[MUL]], [[NEG]]
+; CHECK-NEXT:    ret i32 [[MUL2]]
+;
+  %neg = sub i32 0, %x
+  %mul = mul i32 %neg, %y
+  %mul2 = mul i32 %mul, %neg
+  ret i32 %mul2
+}
+
+@X = global i32 5
+
+define i64 @test_mul_canonicalize_neg_is_not_undone(i64 %L1) {
+; Check we do not undo the canonicalization of 0 - (X * Y), if Y is a constant
+; expr.
+; CHECK-LABEL: @test_mul_canonicalize_neg_is_not_undone(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[L1:%.*]], ptrtoint (i32* @X to i64)
+; CHECK-NEXT:    [[B4:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[B4]]
+;
+  %v1 = ptrtoint i32* @X to i64
+  %B8 = sub i64 0, %v1
+  %B4 = mul i64 %B8, %L1
+  ret i64 %B4
+}
diff --git a/llvm/test/Transforms/InstCombine/multi-size-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
new file mode 100644
index 00000000000..4e5b2100f34
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
@@ -0,0 +1,112 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+
+define i32 @test_as0(i32 addrspace(0)* %a) {
+; CHECK-LABEL: @test_as0(
+; CHECK: %arrayidx = getelementptr i32, i32* %a, i32 1
+  %arrayidx = getelementptr i32, i32 addrspace(0)* %a, i64 1
+  %y = load i32, i32 addrspace(0)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as1(i32 addrspace(1)* %a) {
+; CHECK-LABEL: @test_as1(
+; CHECK: %arrayidx = getelementptr i32, i32 addrspace(1)* %a, i64 1
+  %arrayidx = getelementptr i32, i32 addrspace(1)* %a, i32 1
+  %y = load i32, i32 addrspace(1)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as2(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_as2(
+; CHECK: %arrayidx = getelementptr i32, i32 addrspace(2)* %a, i8 1
+  %arrayidx = getelementptr i32, i32 addrspace(2)* %a, i32 1
+  %y = load i32, i32 addrspace(2)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_as3(i32 addrspace(3)* %a) {
+; CHECK-LABEL: @test_as3(
+; CHECK: %arrayidx = getelementptr i32, i32 addrspace(3)* %a, i16 1
+  %arrayidx = getelementptr i32, i32 addrspace(3)* %a, i32 1
+  %y = load i32, i32 addrspace(3)* %arrayidx, align 4
+  ret i32 %y
+}
+
+define i32 @test_combine_ptrtoint(i32 addrspace(2)* %a) {
+; CHECK-LABEL: @test_combine_ptrtoint(
+; CHECK-NEXT: %y = load i32, i32 addrspace(2)* %a
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint i32 addrspace(2)* %a to i8
+  %castback = inttoptr i8 %cast to i32 addrspace(2)*
+  %y = load i32, i32 addrspace(2)* %castback, align 4
+  ret i32 %y
+}
+
+define i8 @test_combine_inttoptr(i8 %a) {
+; CHECK-LABEL: @test_combine_inttoptr(
+; CHECK-NEXT: ret i8 %a
+  %cast = inttoptr i8 %a to i32 addrspace(2)*
+  %castback = ptrtoint i32 addrspace(2)* %cast to i8
+  ret i8 %castback
+}
+
+define i32 @test_combine_vector_ptrtoint(<2 x i32 addrspace(2)*> %a) {
+; CHECK-LABEL: @test_combine_vector_ptrtoint(
+; CHECK-NEXT: %p = extractelement <2 x i32 addrspace(2)*> %a, i32 0
+; CHECK-NEXT: %y = load i32, i32 addrspace(2)* %p, align 4
+; CHECK-NEXT: ret i32 %y
+  %cast = ptrtoint <2 x i32 addrspace(2)*> %a to <2 x i8>
+  %castback = inttoptr <2 x i8> %cast to <2 x i32 addrspace(2)*>
+  %p = extractelement <2 x i32 addrspace(2)*> %castback, i32 0
+  %y = load i32, i32 addrspace(2)* %p, align 4
+  ret i32 %y
+}
+
+define <2 x i8> @test_combine_vector_inttoptr(<2 x i8> %a) {
+; CHECK-LABEL: @test_combine_vector_inttoptr(
+; CHECK-NEXT: ret <2 x i8> %a
+  %cast = inttoptr <2 x i8> %a to <2 x i32 addrspace(2)*>
+  %castback = ptrtoint <2 x i32 addrspace(2)*> %cast to <2 x i8>
+  ret <2 x i8> %castback
+}
+
+; Check that the GEP index is changed to the address space integer type (i64 -> i8)
+define i32 addrspace(2)* @shrink_gep_constant_index_64_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as2(
+; CHECK-NEXT: getelementptr i32, i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32, i32 addrspace(2)* %p, i64 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_constant_index_32_as2(i32 addrspace(2)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_32_as2(
+; CHECK-NEXT: getelementptr i32, i32 addrspace(2)* %p, i8 1
+  %ret = getelementptr i32, i32 addrspace(2)* %p, i32 1
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(3)* @shrink_gep_constant_index_64_as3(i32 addrspace(3)* %p) {
+; CHECK-LABEL: @shrink_gep_constant_index_64_as3(
+; CHECK-NEXT: getelementptr i32, i32 addrspace(3)* %p, i16 1
+  %ret = getelementptr i32, i32 addrspace(3)* %p, i64 1
+  ret i32 addrspace(3)* %ret
+}
+
+define i32 addrspace(2)* @shrink_gep_variable_index_64_as2(i32 addrspace(2)* %p, i64 %idx) {
+; CHECK-LABEL: @shrink_gep_variable_index_64_as2(
+; CHECK-NEXT: %1 = trunc i64 %idx to i8
+; CHECK-NEXT: getelementptr i32, i32 addrspace(2)* %p, i8 %1
+  %ret = getelementptr i32, i32 addrspace(2)* %p, i64 %idx
+  ret i32 addrspace(2)* %ret
+}
+
+define i32 addrspace(1)* @grow_gep_variable_index_8_as1(i32 addrspace(1)* %p, i8 %idx) {
+; CHECK-LABEL: @grow_gep_variable_index_8_as1(
+; CHECK-NEXT: %1 = sext i8 %idx to i64
+; CHECK-NEXT: getelementptr i32, i32 addrspace(1)* %p, i64 %1
+  %ret = getelementptr i32, i32 addrspace(1)* %p, i8 %idx
+  ret i32 addrspace(1)* %ret
+}
+
diff --git a/llvm/test/Transforms/InstCombine/multi-use-or.ll b/llvm/test/Transforms/InstCombine/multi-use-or.ll
new file mode 100644
index 00000000000..8b90e0d7f61
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/multi-use-or.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | grep "fadd double .sx, .sy"
+; The 'or' has multiple uses, make sure that this doesn't prevent instcombine
+; from propagating the extends to the truncs.
+
+define double @ScaleObjectAdd(double %sx, double %sy, double %sz) nounwind {
+entry:
+        %sx34 = bitcast double %sx to i64               ; <i64> [#uses=1]
+        %sx3435 = zext i64 %sx34 to i192                ; <i192> [#uses=1]
+        %sy22 = bitcast double %sy to i64               ; <i64> [#uses=1]
+        %sy2223 = zext i64 %sy22 to i192                ; <i192> [#uses=1]
+        %sy222324 = shl i192 %sy2223, 128               ; <i192> [#uses=1]
+        %sy222324.ins = or i192 %sx3435, %sy222324              ; <i192> [#uses=1]
+        
+        
+        %a = trunc i192 %sy222324.ins to i64            ; <i64> [#uses=1]
+        %b = bitcast i64 %a to double           ; <double> [#uses=1]
+        %c = lshr i192 %sy222324.ins, 128               ; <i192> [#uses=1]
+        %d = trunc i192 %c to i64               ; <i64> [#uses=1]
+        %e = bitcast i64 %d to double           ; <double> [#uses=1]
+        %f = fadd double %b, %e
+
+;        ret double %e
+        ret double %f
+}
diff --git a/llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll b/llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll
new file mode 100644
index 00000000000..28509df6d2f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S -data-layout="E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" | FileCheck %s
+
+define void @PR35618(i64* %st1, double* %st2) {
+; CHECK-LABEL: @PR35618(
+; CHECK-NEXT:    [[Y1:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[Z1:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[LD1:%.*]] = load double, double* [[Y1]], align 8
+; CHECK-NEXT:    [[LD2:%.*]] = load double, double* [[Z1]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = fcmp olt double [[LD1]], [[LD2]]
+; CHECK-NEXT:    [[TMP121:%.*]] = select i1 [[TMP10]], double [[LD1]], double [[LD2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[ST1:%.*]] to double*
+; CHECK-NEXT:    store double [[TMP121]], double* [[TMP1]], align 8
+; CHECK-NEXT:    store double [[TMP121]], double* [[ST2:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  %y1 = alloca double
+  %z1 = alloca double
+  %ld1 = load double, double* %y1
+  %ld2 = load double, double* %z1
+  %tmp10 = fcmp olt double %ld1, %ld2
+  %sel = select i1 %tmp10, double* %y1, double* %z1
+  %tmp11 = bitcast double* %sel to i64*
+  %tmp12 = load i64, i64* %tmp11
+  store i64 %tmp12, i64* %st1
+  %bc = bitcast double* %st2 to i64*
+  store i64 %tmp12, i64* %bc
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/musttail-thunk.ll b/llvm/test/Transforms/InstCombine/musttail-thunk.ll
new file mode 100644
index 00000000000..2e8e3a7b9c2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/musttail-thunk.ll
@@ -0,0 +1,33 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; RUN: opt -debugify-each -instcombine -S < %s | FileCheck %s
+
+; These are both direct calls, but make sure instcombine leaves the casts
+; alone.
+
+define i32 @call_thunk(i32 %x, i32 %y) {
+  %r = call i32 bitcast (void (i32, ...)* @inc_first_arg_thunk to i32 (i32, i32)*)(i32 %x, i32 %y)
+  ret i32 %r
+}
+
+; CHECK-LABEL: define i32 @call_thunk(i32 %x, i32 %y)
+; CHECK:   %r = call i32 bitcast (void (i32, ...)* @inc_first_arg_thunk to i32 (i32, i32)*)(i32 %x, i32 %y)
+; CHECK:   ret i32 %r
+
+define internal void @inc_first_arg_thunk(i32 %arg1, ...) #0 {
+entry:
+  %inc = add i32 %arg1, 1
+  musttail call void (i32, ...) bitcast (i32 (i32, i32)* @plus to void (i32, ...)*)(i32 %inc, ...)
+  ret void
+}
+
+; CHECK-LABEL: define internal void @inc_first_arg_thunk(i32 %arg1, ...) #0
+; CHECK:   %inc = add i32 %arg1, 1
+; CHECK:   musttail call void (i32, ...) bitcast (i32 (i32, i32)* @plus to void (i32, ...)*)(i32 %inc, ...)
+; CHECK:   ret void
+
+define internal i32 @plus(i32 %x, i32 %y) {
+  %r = add i32 %x, %y
+  ret i32 %r
+}
+
+attributes #0 = { "thunk" }
diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll
new file mode 100644
index 00000000000..8caf93d690b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/narrow-math.ll
@@ -0,0 +1,630 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare i32 @callee()
+
+declare void @use(i64)
+
+define i64 @sext_sext_add(i32 %A) {
+; CHECK-LABEL: @sext_sext_add(
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[A]], 9
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw i32 [[B]], [[C]]
+; CHECK-NEXT:    [[F:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[F]]
+;
+  %B = ashr i32 %A, 7
+  %C = ashr i32 %A, 9
+  %D = sext i32 %B to i64
+  %E = sext i32 %C to i64
+  %F = add i64 %D, %E
+  ret i64 %F
+}
+
+; Negative test
+
+define i64 @sext_zext_add_mismatched_exts(i32 %A) {
+; CHECK-LABEL: @sext_zext_add_mismatched_exts(
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = lshr i32 [[A]], 9
+; CHECK-NEXT:    [[D:%.*]] = sext i32 [[B]] to i64
+; CHECK-NEXT:    [[E:%.*]] = zext i32 [[C]] to i64
+; CHECK-NEXT:    [[F:%.*]] = add nsw i64 [[D]], [[E]]
+; CHECK-NEXT:    ret i64 [[F]]
+;
+  %B = ashr i32 %A, 7
+  %C = lshr i32 %A, 9
+  %D = sext i32 %B to i64
+  %E = zext i32 %C to i64
+  %F = add i64 %D, %E
+  ret i64 %F
+}
+
+; Negative test
+
+define i64 @sext_sext_add_mismatched_types(i16 %A, i32 %x) {
+; CHECK-LABEL: @sext_sext_add_mismatched_types(
+; CHECK-NEXT:    [[B:%.*]] = ashr i16 [[A:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[X:%.*]], 9
+; CHECK-NEXT:    [[D:%.*]] = sext i16 [[B]] to i64
+; CHECK-NEXT:    [[E:%.*]] = sext i32 [[C]] to i64
+; CHECK-NEXT:    [[F:%.*]] = add nsw i64 [[D]], [[E]]
+; CHECK-NEXT:    ret i64 [[F]]
+;
+  %B = ashr i16 %A, 7
+  %C = ashr i32 %x, 9
+  %D = sext i16 %B to i64
+  %E = sext i32 %C to i64
+  %F = add i64 %D, %E
+  ret i64 %F
+}
+
+define i64 @sext_sext_add_extra_use1(i32 %A) {
+; CHECK-LABEL: @sext_sext_add_extra_use1(
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[A]], 9
+; CHECK-NEXT:    [[D:%.*]] = sext i32 [[B]] to i64
+; CHECK-NEXT:    call void @use(i64 [[D]])
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw i32 [[B]], [[C]]
+; CHECK-NEXT:    [[F:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[F]]
+;
+  %B = ashr i32 %A, 7
+  %C = ashr i32 %A, 9
+  %D = sext i32 %B to i64
+  call void @use(i64 %D)
+  %E = sext i32 %C to i64
+  %F = add i64 %D, %E
+  ret i64 %F
+}
+
+define i64 @sext_sext_add_extra_use2(i32 %A) {
+; CHECK-LABEL: @sext_sext_add_extra_use2(
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[A]], 9
+; CHECK-NEXT:    [[E:%.*]] = sext i32 [[C]] to i64
+; CHECK-NEXT:    call void @use(i64 [[E]])
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw i32 [[B]], [[C]]
+; CHECK-NEXT:    [[F:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[F]]
+;
+  %B = ashr i32 %A, 7
+  %C = ashr i32 %A, 9
+  %D = sext i32 %B to i64
+  %E = sext i32 %C to i64
+  call void @use(i64 %E)
+  %F = add i64 %D, %E
+  ret i64 %F
+}
+
+; Negative test - if both extends have extra uses, we need an extra instruction.
+
+define i64 @sext_sext_add_extra_use3(i32 %A) {
+; CHECK-LABEL: @sext_sext_add_extra_use3(
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[A]], 9
+; CHECK-NEXT:    [[D:%.*]] = sext i32 [[B]] to i64
+; CHECK-NEXT:    call void @use(i64 [[D]])
+; CHECK-NEXT:    [[E:%.*]] = sext i32 [[C]] to i64
+; CHECK-NEXT:    call void @use(i64 [[E]])
+; CHECK-NEXT:    [[F:%.*]] = add nsw i64 [[D]], [[E]]
+; CHECK-NEXT:    ret i64 [[F]]
+;
+  %B = ashr i32 %A, 7
+  %C = ashr i32 %A, 9
+  %D = sext i32 %B to i64
+  call void @use(i64 %D)
+  %E = sext i32 %C to i64
+  call void @use(i64 %E)
+  %F = add i64 %D, %E
+  ret i64 %F
+}
+
+define i64 @test1(i32 %V) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[NARROW:%.*]] = add nuw nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[ADD:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %call1 = call i32 @callee(), !range !0
+  %call2 = call i32 @callee(), !range !0
+  %zext1 = sext i32 %call1 to i64
+  %zext2 = sext i32 %call2 to i64
+  %add = add i64 %zext1, %zext2
+  ret i64 %add
+}
+
+define i64 @test2(i32 %V) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %call1 = call i32 @callee(), !range !0
+  %call2 = call i32 @callee(), !range !0
+  %add = add i32 %call1, %call2
+  %zext = sext i32 %add to i64
+  ret i64 %zext
+}
+
+define i64 @test3(i32 %V) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nuw nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[ADD:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %call1 = call i32 @callee(), !range !0
+  %call2 = call i32 @callee(), !range !0
+  %zext1 = sext i32 %call1 to i64
+  %zext2 = sext i32 %call2 to i64
+  %add = mul i64 %zext1, %zext2
+  ret i64 %add
+}
+
+define i64 @test4(i32 %V) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[ADD:%.*]] = mul nuw nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %call1 = call i32 @callee(), !range !0
+  %call2 = call i32 @callee(), !range !0
+  %add = mul i32 %call1, %call2
+  %zext = sext i32 %add to i64
+  ret i64 %zext
+}
+
+define i64 @test5(i32 %V) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw i32 [[ASHR]], 1073741823
+; CHECK-NEXT:    [[ADD:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %ashr = ashr i32 %V, 1
+  %sext = sext i32 %ashr to i64
+  %add = add i64 %sext, 1073741823
+  ret i64 %add
+}
+
+; Negative test - extra use means we'd have more instructions than we started with.
+
+define i64 @sext_add_constant_extra_use(i32 %V) {
+; CHECK-LABEL: @sext_add_constant_extra_use(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[ASHR]] to i64
+; CHECK-NEXT:    call void @use(i64 [[SEXT]])
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[SEXT]], 1073741823
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %ashr = ashr i32 %V, 1
+  %sext = sext i32 %ashr to i64
+  call void @use(i64 %sext)
+  %add = add i64 %sext, 1073741823
+  ret i64 %add
+}
+
+define <2 x i64> @test5_splat(<2 x i32> %V) {
+; CHECK-LABEL: @test5_splat(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 1, i32 1>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %add = add <2 x i64> %sext, <i64 1073741823, i64 1073741823>
+  ret <2 x i64> %add
+}
+
+define <2 x i64> @test5_vec(<2 x i32> %V) {
+; CHECK-LABEL: @test5_vec(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1, i32 2>
+; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 1, i32 1>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %add = add <2 x i64> %sext, <i64 1, i64 2>
+  ret <2 x i64> %add
+}
+
+define i64 @test6(i32 %V) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw i32 [[ASHR]], -1073741824
+; CHECK-NEXT:    [[ADD:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %ashr = ashr i32 %V, 1
+  %sext = sext i32 %ashr to i64
+  %add = add i64 %sext, -1073741824
+  ret i64 %add
+}
+
+define <2 x i64> @test6_splat(<2 x i32> %V) {
+; CHECK-LABEL: @test6_splat(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1073741824, i32 -1073741824>
+; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 1, i32 1>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %add = add <2 x i64> %sext, <i64 -1073741824, i64 -1073741824>
+  ret <2 x i64> %add
+}
+
+define <2 x i64> @test6_vec(<2 x i32> %V) {
+; CHECK-LABEL: @test6_vec(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 -2>
+; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 1, i32 1>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %add = add <2 x i64> %sext, <i64 -1, i64 -2>
+  ret <2 x i64> %add
+}
+
+define <2 x i64> @test6_vec2(<2 x i32> %V) {
+; CHECK-LABEL: @test6_vec2(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 1>
+; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 1, i32 1>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %add = add <2 x i64> %sext, <i64 -1, i64 1>
+  ret <2 x i64> %add
+}
+
+define i64 @test7(i32 %V) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[NARROW:%.*]] = add nuw i32 [[LSHR]], 2147483647
+; CHECK-NEXT:    [[ADD:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %lshr = lshr i32 %V, 1
+  %zext = zext i32 %lshr to i64
+  %add = add i64 %zext, 2147483647
+  ret i64 %add
+}
+
+define <2 x i64> @test7_splat(<2 x i32> %V) {
+; CHECK-LABEL: @test7_splat(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %lshr = lshr <2 x i32> %V, <i32 1, i32 1>
+  %zext = zext <2 x i32> %lshr to <2 x i64>
+  %add = add <2 x i64> %zext, <i64 2147483647, i64 2147483647>
+  ret <2 x i64> %add
+}
+
+define <2 x i64> @test7_vec(<2 x i32> %V) {
+; CHECK-LABEL: @test7_vec(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 1, i32 2>
+; CHECK-NEXT:    [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %lshr = lshr <2 x i32> %V, <i32 1, i32 1>
+  %zext = zext <2 x i32> %lshr to <2 x i64>
+  %add = add <2 x i64> %zext, <i64 1, i64 2>
+  ret <2 x i64> %add
+}
+
+define i64 @test8(i32 %V) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 [[V:%.*]], 16
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw i32 [[ASHR]], 32767
+; CHECK-NEXT:    [[MUL:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[MUL]]
+;
+  %ashr = ashr i32 %V, 16
+  %sext = sext i32 %ashr to i64
+  %mul = mul i64 %sext, 32767
+  ret i64 %mul
+}
+
+define <2 x i64> @test8_splat(<2 x i32> %V) {
+; CHECK-LABEL: @test8_splat(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 32767, i32 32767>
+; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[MUL]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 16, i32 16>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %mul = mul <2 x i64> %sext, <i64 32767, i64 32767>
+  ret <2 x i64> %mul
+}
+
+define <2 x i64> @test8_vec(<2 x i32> %V) {
+; CHECK-LABEL: @test8_vec(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 32767, i32 16384>
+; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[MUL]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 16, i32 16>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %mul = mul <2 x i64> %sext, <i64 32767, i64 16384>
+  ret <2 x i64> %mul
+}
+
+define <2 x i64> @test8_vec2(<2 x i32> %V) {
+; CHECK-LABEL: @test8_vec2(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 32767, i32 -32767>
+; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[MUL]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 16, i32 16>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %mul = mul <2 x i64> %sext, <i64 32767, i64 -32767>
+  ret <2 x i64> %mul
+}
+
+define i64 @test9(i32 %V) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 [[V:%.*]], 16
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw i32 [[ASHR]], -32767
+; CHECK-NEXT:    [[MUL:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[MUL]]
+;
+  %ashr = ashr i32 %V, 16
+  %sext = sext i32 %ashr to i64
+  %mul = mul i64 %sext, -32767
+  ret i64 %mul
+}
+
+define <2 x i64> @test9_splat(<2 x i32> %V) {
+; CHECK-LABEL: @test9_splat(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 -32767, i32 -32767>
+; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[MUL]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 16, i32 16>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %mul = mul <2 x i64> %sext, <i64 -32767, i64 -32767>
+  ret <2 x i64> %mul
+}
+
+define <2 x i64> @test9_vec(<2 x i32> %V) {
+; CHECK-LABEL: @test9_vec(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 -32767, i32 -10>
+; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[MUL]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 16, i32 16>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %mul = mul <2 x i64> %sext, <i64 -32767, i64 -10>
+  ret <2 x i64> %mul
+}
+
+define i64 @test10(i32 %V) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 [[V:%.*]], 16
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nuw i32 [[LSHR]], 65535
+; CHECK-NEXT:    [[MUL:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[MUL]]
+;
+  %lshr = lshr i32 %V, 16
+  %zext = zext i32 %lshr to i64
+  %mul = mul i64 %zext, 65535
+  ret i64 %mul
+}
+
+define <2 x i64> @test10_splat(<2 x i32> %V) {
+; CHECK-LABEL: @test10_splat(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[MUL]]
+;
+  %lshr = lshr <2 x i32> %V, <i32 16, i32 16>
+  %zext = zext <2 x i32> %lshr to <2 x i64>
+  %mul = mul <2 x i64> %zext, <i64 65535, i64 65535>
+  ret <2 x i64> %mul
+}
+
+define <2 x i64> @test10_vec(<2 x i32> %V) {
+; CHECK-LABEL: @test10_vec(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], <i32 65535, i32 2>
+; CHECK-NEXT:    [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[MUL]]
+;
+  %lshr = lshr <2 x i32> %V, <i32 16, i32 16>
+  %zext = zext <2 x i32> %lshr to <2 x i64>
+  %mul = mul <2 x i64> %zext, <i64 65535, i64 2>
+  ret <2 x i64> %mul
+}
+
+define i64 @test11(i32 %V) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !1
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !1
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[ADD:%.*]] = sext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %call1 = call i32 @callee(), !range !1
+  %call2 = call i32 @callee(), !range !1
+  %sext1 = sext i32 %call1 to i64
+  %sext2 = sext i32 %call2 to i64
+  %add = add i64 %sext1, %sext2
+  ret i64 %add
+}
+
+define i64 @test12(i32 %V) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !1
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !1
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %call1 = call i32 @callee(), !range !1
+  %call2 = call i32 @callee(), !range !1
+  %sext1 = sext i32 %call1 to i64
+  %sext2 = sext i32 %call2 to i64
+  %add = mul i64 %sext1, %sext2
+  ret i64 %add
+}
+
+define i64 @test13(i32 %V) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !2
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !3
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[SUB:%.*]] = sext i32 [[SUBCONV]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !2
+  %call2 = call i32 @callee(), !range !3
+  %sext1 = sext i32 %call1 to i64
+  %sext2 = sext i32 %call2 to i64
+  %sub = sub i64 %sext1, %sext2
+  ret i64 %sub
+}
+
+define i64 @test14(i32 %V) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !2
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nuw nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[SUB:%.*]] = zext i32 [[SUBCONV]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !2
+  %call2 = call i32 @callee(), !range !0
+  %zext1 = zext i32 %call1 to i64
+  %zext2 = zext i32 %call2 to i64
+  %sub = sub i64 %zext1, %zext2
+  ret i64 %sub
+}
+
+define i64 @test15(i32 %V) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nsw i32 8, [[ASHR]]
+; CHECK-NEXT:    [[SUB:%.*]] = sext i32 [[SUBCONV]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ashr = ashr i32 %V, 1
+  %sext = sext i32 %ashr to i64
+  %sub = sub i64 8, %sext
+  ret i64 %sub
+}
+
+define <2 x i64> @test15vec(<2 x i32> %V) {
+; CHECK-LABEL: @test15vec(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nsw <2 x i32> <i32 8, i32 8>, [[ASHR]]
+; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i32> [[SUBCONV]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 1, i32 1>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %sub = sub <2 x i64> <i64 8, i64 8>, %sext
+  ret <2 x i64> %sub
+}
+
+define i64 @test16(i32 %V) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nuw i32 -2, [[LSHR]]
+; CHECK-NEXT:    [[SUB:%.*]] = zext i32 [[SUBCONV]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %lshr = lshr i32 %V, 1
+  %zext = zext i32 %lshr to i64
+  %sub = sub i64 4294967294, %zext
+  ret i64 %sub
+}
+
+define <2 x i64> @test16vec(<2 x i32> %V) {
+; CHECK-LABEL: @test16vec(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nuw <2 x i32> <i32 -2, i32 -2>, [[LSHR]]
+; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i32> [[SUBCONV]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %lshr = lshr <2 x i32> %V, <i32 1, i32 1>
+  %zext = zext <2 x i32> %lshr to <2 x i64>
+  %sub = sub <2 x i64> <i64 4294967294, i64 4294967294>, %zext
+  ret <2 x i64> %sub
+}
+
+; Negative test. Both have the same range so we can't guarantee the subtract
+; won't wrap.
+define i64 @test17(i32 %V) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[SEXT1:%.*]] = zext i32 [[CALL1]] to i64
+; CHECK-NEXT:    [[SEXT2:%.*]] = zext i32 [[CALL2]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !0
+  %call2 = call i32 @callee(), !range !0
+  %sext1 = zext i32 %call1 to i64
+  %sext2 = zext i32 %call2 to i64
+  %sub = sub i64 %sext1, %sext2
+  ret i64 %sub
+}
+
+; Negative test. LHS is large positive 32-bit number. Range of callee can
+; cause overflow.
+define i64 @test18(i32 %V) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !1
+; CHECK-NEXT:    [[SEXT1:%.*]] = sext i32 [[CALL1]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i64 2147481648, [[SEXT1]]
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !1
+  %sext1 = sext i32 %call1 to i64
+  %sub = sub i64 2147481648, %sext1
+  ret i64 %sub
+}
+
+; Negative test. LHS is large negative 32-bit number. Range of callee can
+; cause overflow.
+define i64 @test19(i32 %V) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[CALL1]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i64 -2147481648, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !0
+  %sext1 = sext i32 %call1 to i64
+  %sub = sub i64 -2147481648, %sext1
+  ret i64 %sub
+}
+
+!0 = !{ i32 0, i32 2000 }
+!1 = !{ i32 -2000, i32 0 }
+!2 = !{ i32 -512, i32 -255 }
+!3 = !{ i32 -128, i32 0 }
diff --git a/llvm/test/Transforms/InstCombine/narrow-switch.ll b/llvm/test/Transforms/InstCombine/narrow-switch.ll
new file mode 100644
index 00000000000..a8fa3e528db
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/narrow-switch.ll
@@ -0,0 +1,262 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Vary legal integer types in data layout.
+; RUN: opt < %s -instcombine -S -data-layout=n32    | FileCheck %s --check-prefix=ALL --check-prefix=CHECK32
+; RUN: opt < %s -instcombine -S -data-layout=n32:64 | FileCheck %s --check-prefix=ALL --check-prefix=CHECK64
+
+define i32 @positive1(i64 %a) {
+; ALL-LABEL: @positive1(
+; ALL:         switch i32
+; ALL-NEXT:    i32 10, label %return
+; ALL-NEXT:    i32 100, label %sw.bb1
+; ALL-NEXT:    i32 1001, label %sw.bb2
+; ALL-NEXT:    ]
+;
+entry:
+  %and = and i64 %a, 4294967295
+  switch i64 %and, label %sw.default [
+  i64 10, label %return
+  i64 100, label %sw.bb1
+  i64 1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+define i32 @negative1(i64 %a) {
+; ALL-LABEL: @negative1(
+; ALL:         switch i32
+; ALL-NEXT:    i32 -10, label %return
+; ALL-NEXT:    i32 -100, label %sw.bb1
+; ALL-NEXT:    i32 -1001, label %sw.bb2
+; ALL-NEXT:    ]
+;
+entry:
+  %or = or i64 %a, -4294967296
+  switch i64 %or, label %sw.default [
+  i64 -10, label %return
+  i64 -100, label %sw.bb1
+  i64 -1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; Make sure truncating a constant int larger than 64-bit doesn't trigger an
+; assertion.
+
+define i32 @trunc72to68(i72 %a) {
+; ALL-LABEL: @trunc72to68(
+; ALL:         switch i68
+; ALL-NEXT:    i68 10, label %return
+; ALL-NEXT:    i68 100, label %sw.bb1
+; ALL-NEXT:    i68 1001, label %sw.bb2
+; ALL-NEXT:    ]
+;
+entry:
+  %and = and i72 %a, 295147905179352825855
+  switch i72 %and, label %sw.default [
+  i72 10, label %return
+  i72 100, label %sw.bb1
+  i72 1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; Make sure to avoid assertion crashes and use the type before
+; truncation to generate the sub constant expressions that leads
+; to the recomputed condition.
+; We allow to truncate from i64 to i59 if in 32-bit mode,
+; because both are illegal.
+
+define void @trunc64to59(i64 %a) {
+; ALL-LABEL: @trunc64to59(
+; ALL-CHECK32:         switch i59
+; ALL-CHECK32-NEXT:    i59 0, label %sw.bb1
+; ALL-CHECK32-NEXT:    i59 18717182647723699, label %sw.bb2
+; ALL-CHECK32-NEXT:    ]
+; ALL-CHECK64:         switch i64
+; ALL-CHECK64-NEXT:    i64 0, label %sw.bb1
+; ALL-CHECK64-NEXT:    i64 18717182647723699, label %sw.bb2
+; ALL-CHECK64-NEXT:    ]
+;
+entry:
+  %tmp0 = and i64 %a, 15
+  %tmp1 = mul i64 %tmp0, -6425668444178048401
+  %tmp2 = add i64 %tmp1, 5170979678563097242
+  %tmp3 = mul i64 %tmp2, 1627972535142754813
+  switch i64 %tmp3, label %sw.default [
+  i64 847514119312061490, label %sw.bb1
+  i64 866231301959785189, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %sw.default
+
+sw.bb2:
+  br label %sw.default
+
+sw.default:
+  ret void
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=31260
+
+define i8 @PR31260(i8 %x) {
+; ALL-LABEL: @PR31260(
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TMP0:%.*]] = trunc i8 %x to i2
+; ALL-NEXT:    [[TRUNC:%.*]] = and i2 [[TMP0]], -2
+; ALL-NEXT:    switch i2 [[TRUNC]], label %exit [
+; ALL-NEXT:    i2 0, label %case126
+; ALL-NEXT:    i2 -2, label %case124
+; ALL-NEXT:    ]
+; ALL:       exit:
+; ALL-NEXT:    ret i8 1
+; ALL:       case126:
+; ALL-NEXT:    ret i8 3
+; ALL:       case124:
+; ALL-NEXT:    ret i8 5
+;
+entry:
+  %t4 = and i8 %x, 2
+  %t5 = add nsw i8 %t4, -126
+  switch i8 %t5, label %exit [
+  i8 -126, label %case126
+  i8 -124, label %case124
+  ]
+
+exit:
+  ret i8 1
+case126:
+  ret i8 3
+case124:
+  ret i8 5
+}
+
+; Make sure the arithmetic evaluation of the switch
+; condition is evaluated on the original type
+define i32 @trunc32to16(i32 %a0) #0 {
+; ALL-LABEL: @trunc32to16(
+; ALL:         switch i16
+; ALL-NEXT:    i16 63, label %sw.bb
+; ALL-NEXT:    i16 1, label %sw.bb1
+; ALL-NEXT:    i16 100, label %sw.bb2
+; ALL-NEXT:    ]
+;
+entry:
+  %retval = alloca i32, align 4
+  %xor = xor i32 %a0, 1034460917
+  %shr = lshr i32 %xor, 16
+  %add = add i32 %shr, -917677090
+  switch i32 %add, label %sw.epilog [
+    i32 -917677027, label %sw.bb
+    i32 -917677089, label %sw.bb1
+    i32 -917676990, label %sw.bb2
+  ]
+
+sw.bb:                                            ; preds = %entry
+  store i32 90, i32* %retval, align 4
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+  store i32 91, i32* %retval, align 4
+  br label %return
+
+sw.bb2:                                           ; preds = %entry
+  store i32 92, i32* %retval, align 4
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 113, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb2,
+  %rval = load i32, i32* %retval, align 4
+  ret i32 %rval
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=29009
+
+@a = global i32 0, align 4
+@njob = global i32 0, align 4
+
+declare i32 @goo()
+
+; Make sure we do not shrink to illegal types (i3 in this case)
+; if original type is legal (i32 in this case)
+
+define void @PR29009() {
+; ALL-LABEL: @PR29009(
+; ALL:         switch i32
+; ALL-NEXT:    i32 0, label
+; ALL-NEXT:    i32 3, label
+; ALL-NEXT:    ]
+;
+  br label %1
+
+; <label>:1:                                      ; preds = %10, %0
+  %2 = load volatile i32, i32* @njob, align 4
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %4, label %11
+
+; <label>:4:                                      ; preds = %1
+  %5 = call i32 @goo()
+  %6 = and i32 %5, 7
+  switch i32 %6, label %7 [
+    i32 0, label %8
+    i32 3, label %9
+  ]
+
+; <label>:7:                                      ; preds = %4
+  store i32 6, i32* @a, align 4
+  br label %10
+
+; <label>:8:                                      ; preds = %4
+  store i32 1, i32* @a, align 4
+  br label %10
+
+; <label>:9:                                      ; preds = %4
+  store i32 2, i32* @a, align 4
+  br label %10
+
+; <label>:10:                                     ; preds = %13, %12, %11, %10, %9, %8, %7
+  br label %1
+
+; <label>:11:                                     ; preds = %1
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/narrow.ll b/llvm/test/Transforms/InstCombine/narrow.ll
new file mode 100644
index 00000000000..05e3d8be2f5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/narrow.ll
@@ -0,0 +1,239 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "n8:16:32:64"
+
+; Eliminating the casts in this testcase (by narrowing the AND operation)
+; allows instcombine to realize the function always returns false.
+
+define i1 @test1(i32 %A, i32 %B) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 false
+;
+  %C1 = icmp slt i32 %A, %B
+  %ELIM1 = zext i1 %C1 to i32
+  %C2 = icmp sgt i32 %A, %B
+  %ELIM2 = zext i1 %C2 to i32
+  %C3 = and i32 %ELIM1, %ELIM2
+  %ELIM3 = trunc i32 %C3 to i1
+  ret i1 %ELIM3
+}
+
+; The next 6 (3 logic ops * (scalar+vector)) tests show potential cases for narrowing a bitwise logic op.
+
+define i32 @shrink_xor(i64 %a) {
+; CHECK-LABEL: @shrink_xor(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TRUNC:%.*]] = xor i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i32 [[TRUNC]]
+;
+  %xor = xor i64 %a, 1
+  %trunc = trunc i64 %xor to i32
+  ret i32 %trunc
+}
+
+; Vectors (with splat constants) should get the same transform.
+
+define <2 x i32> @shrink_xor_vec(<2 x i64> %a) {
+; CHECK-LABEL: @shrink_xor_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TRUNC:%.*]] = xor <2 x i32> [[TMP1]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
+;
+  %xor = xor <2 x i64> %a, <i64 2, i64 2>
+  %trunc = trunc <2 x i64> %xor to <2 x i32>
+  ret <2 x i32> %trunc
+}
+
+; Source and dest types are not in the datalayout.
+
+define i3 @shrink_or(i6 %a) {
+; CHECK-LABEL: @shrink_or(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i6 [[A:%.*]] to i3
+; CHECK-NEXT:    [[TRUNC:%.*]] = or i3 [[TMP1]], 1
+; CHECK-NEXT:    ret i3 [[TRUNC]]
+;
+  %or = or i6 %a, 33
+  %trunc = trunc i6 %or to i3
+  ret i3 %trunc
+}
+
+; Vectors (with non-splat constants) should get the same transform.
+
+define <2 x i8> @shrink_or_vec(<2 x i16> %a) {
+; CHECK-LABEL: @shrink_or_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i16> [[A:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[TRUNC:%.*]] = or <2 x i8> [[TMP1]], <i8 -1, i8 0>
+; CHECK-NEXT:    ret <2 x i8> [[TRUNC]]
+;
+  %or = or <2 x i16> %a, <i16 -1, i16 256>
+  %trunc = trunc <2 x i16> %or to <2 x i8>
+  ret <2 x i8> %trunc
+}
+
+; We discriminate against weird types.
+
+define i31 @shrink_and(i64 %a) {
+; CHECK-LABEL: @shrink_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[A:%.*]], 42
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[AND]] to i31
+; CHECK-NEXT:    ret i31 [[TRUNC]]
+;
+  %and = and i64 %a, 42
+  %trunc = trunc i64 %and to i31
+  ret i31 %trunc
+}
+
+; Chop the top of the constant(s) if needed.
+
+define <2 x i32> @shrink_and_vec(<2 x i33> %a) {
+; CHECK-LABEL: @shrink_and_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i33> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TRUNC:%.*]] = and <2 x i32> [[TMP1]], <i32 0, i32 6>
+; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
+;
+  %and = and <2 x i33> %a, <i33 4294967296, i33 6>
+  %trunc = trunc <2 x i33> %and to <2 x i32>
+  ret <2 x i32> %trunc
+}
+
+; FIXME:
+; This is based on an 'any_of' loop construct.
+; By narrowing the phi and logic op, we simplify away the zext and the final icmp.
+
+define i1 @searchArray1(i32 %needle, i32* %haystack) {
+; CHECK-LABEL: @searchArray1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[FOUND:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[OR:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDVAR]] to i64
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr i32, i32* [[HAYSTACK:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[IDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[LD]], [[NEEDLE:%.*]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP1]] to i8
+; CHECK-NEXT:    [[OR]] = or i8 [[FOUND]], [[ZEXT]]
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[OR]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %loop ]
+  %found = phi i8 [ 0, %entry ], [ %or, %loop ]
+  %idx = getelementptr i32, i32* %haystack, i32 %indvar
+  %ld = load i32, i32* %idx
+  %cmp1 = icmp eq i32 %ld, %needle
+  %zext = zext i1 %cmp1 to i8
+  %or = or i8 %found, %zext
+  %indvar.next = add i32 %indvar, 1
+  %exitcond = icmp eq i32 %indvar.next, 1000
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  %tobool = icmp ne i8 %or, 0
+  ret i1 %tobool
+}
+
+; FIXME:
+; This is based on an 'all_of' loop construct.
+; By narrowing the phi and logic op, we simplify away the zext and the final icmp.
+
+define i1 @searchArray2(i32 %hay, i32* %haystack) {
+; CHECK-LABEL: @searchArray2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[FOUND:%.*]] = phi i8 [ 1, [[ENTRY]] ], [ [[AND:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr i32, i32* [[HAYSTACK:%.*]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[IDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[LD]], [[HAY:%.*]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[CMP1]] to i8
+; CHECK-NEXT:    [[AND]] = and i8 [[FOUND]], [[ZEXT]]
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[TOBOOL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+  %found = phi i8 [ 1, %entry ], [ %and, %loop ]
+  %idx = getelementptr i32, i32* %haystack, i64 %indvar
+  %ld = load i32, i32* %idx
+  %cmp1 = icmp eq i32 %ld, %hay
+  %zext = zext i1 %cmp1 to i8
+  %and = and i8 %found, %zext
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 1000
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  %tobool = icmp ne i8 %and, 0
+  ret i1 %tobool
+}
+
+; FIXME:
+; Narrowing should work with an 'xor' and is not limited to bool types.
+
+define i32 @shrinkLogicAndPhi1(i8 %x, i1 %cond) {
+; CHECK-LABEL: @shrinkLogicAndPhi1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ENDIF:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[ENDIF]]
+; CHECK:       endif:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[LOGIC:%.*]] = xor i32 [[PHI]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[LOGIC]]
+;
+entry:
+  br i1 %cond, label %if, label %endif
+if:
+  br label %endif
+endif:
+  %phi = phi i32 [ 21, %entry], [ 33, %if ]
+  %zext = zext i8 %x to i32
+  %logic = xor i32 %phi, %zext
+  ret i32 %logic
+}
+
+; FIXME:
+; Narrowing should work with an 'xor' and is not limited to bool types.
+; Test that commuting the xor operands does not inhibit optimization.
+
+define i32 @shrinkLogicAndPhi2(i8 %x, i1 %cond) {
+; CHECK-LABEL: @shrinkLogicAndPhi2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ENDIF:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[ENDIF]]
+; CHECK:       endif:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ 33, [[IF]] ]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[LOGIC:%.*]] = xor i32 [[PHI]], [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[LOGIC]]
+;
+entry:
+  br i1 %cond, label %if, label %endif
+if:
+  br label %endif
+endif:
+  %phi = phi i32 [ 21, %entry], [ 33, %if ]
+  %zext = zext i8 %x to i32
+  %logic = xor i32 %zext, %phi
+  ret i32 %logic
+}
+
diff --git a/llvm/test/Transforms/InstCombine/no-negzero.ll b/llvm/test/Transforms/InstCombine/no-negzero.ll
new file mode 100644
index 00000000000..07e68251b5a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/no-negzero.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; ModuleID = '3555a.c'
+; sqrt(fabs) cannot be negative zero, so we should eliminate the fadd.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.8"
+
+; CHECK-LABEL: @mysqrt(
+; CHECK-NOT: fadd
+; CHECK: ret
+define double @mysqrt(double %x) nounwind {
+entry:
+  %x_addr = alloca double                         ; <double*> [#uses=2]
+  %retval = alloca double, align 8                ; <double*> [#uses=2]
+  %0 = alloca double, align 8                     ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store double %x, double* %x_addr
+  %1 = load double, double* %x_addr, align 8              ; <double> [#uses=1]
+  %2 = call double @fabs(double %1) nounwind readnone ; <double> [#uses=1]
+  %3 = call double @sqrt(double %2) nounwind readonly ; <double> [#uses=1]
+  %4 = fadd double %3, 0.000000e+00               ; <double> [#uses=1]
+  store double %4, double* %0, align 8
+  %5 = load double, double* %0, align 8                   ; <double> [#uses=1]
+  store double %5, double* %retval, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load double, double* %retval                 ; <double> [#uses=1]
+  ret double %retval1
+}
+
+declare double @fabs(double)
+
+declare double @sqrt(double) nounwind readonly
diff --git a/llvm/test/Transforms/InstCombine/no_cgscc_assert.ll b/llvm/test/Transforms/InstCombine/no_cgscc_assert.ll
new file mode 100644
index 00000000000..677066fa2ab
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/no_cgscc_assert.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; PR21403: http://llvm.org/bugs/show_bug.cgi?id=21403
+; When the call to sqrtf is replaced by an intrinsic call to fabs,
+; it should not cause a problem in CGSCC. 
+
+define float @bar(float %f) #0 {
+  %mul = fmul fast float %f, %f
+  %call1 = call fast float @sqrtf(float %mul)
+  ret float %call1
+
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: call fast float @llvm.fabs.f32
+; CHECK-NEXT: ret float
+}
+
+declare float @sqrtf(float)
+
diff --git a/llvm/test/Transforms/InstCombine/no_sink_instruction.ll b/llvm/test/Transforms/InstCombine/no_sink_instruction.ll
new file mode 100644
index 00000000000..caeba16fa2e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/no_sink_instruction.ll
@@ -0,0 +1,19 @@
+; RUN: opt -instcombine -instcombine-code-sinking=0 -S < %s | FileCheck %s
+
+define i32 @test(i1 %C, i32 %A, i32 %B) {
+; CHECK-LABEL: @test(
+; CHECK: sdiv i32
+; CHECK-NEXT: add i32
+entry:
+        %tmp.2 = sdiv i32 %A, %B                ; <i32> [#uses=1]
+        %tmp.9 = add i32 %B, %A         ; <i32> [#uses=1]
+        br i1 %C, label %then, label %endif
+
+then:           ; preds = %entry
+; CHECK: ret i32
+        ret i32 %tmp.9
+
+endif:          ; preds = %entry
+; CHECK: ret i32
+        ret i32 %tmp.2
+}
diff --git a/llvm/test/Transforms/InstCombine/non-integral-pointers.ll b/llvm/test/Transforms/InstCombine/non-integral-pointers.ll
new file mode 100644
index 00000000000..3b4538985bd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/non-integral-pointers.ll
@@ -0,0 +1,92 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8 addrspace(4)* @f_0() {
+; CHECK-LABEL: @f_0(
+; CHECK: ret i8 addrspace(4)* getelementptr (i8, i8 addrspace(4)* null, i64 50)
+  %result = getelementptr i8, i8 addrspace(4)* null, i64 50
+  ret i8 addrspace(4)* %result
+}
+
+define i8 addrspace(3)* @f_1() {
+; inttoptr is fine here since addrspace(3) is integral.
+
+; CHECK-LABEL: @f_1(
+; CHECK: ret i8 addrspace(3)* inttoptr (i64 50 to i8 addrspace(3)*)
+  %result = getelementptr i8, i8 addrspace(3)* null, i64 50
+  ret i8 addrspace(3)* %result
+}
+
+define void @f_2(i8 addrspace(4)** %ptr0, i8 addrspace(4)** %ptr1) {
+; It is not okay to convert the load/store pair to load and store
+; integers, since pointers in address space 4 are non-integral.
+
+; CHECK-LABEL: @f_2(
+entry:
+; CHECK:  %val = load i8 addrspace(4)*, i8 addrspace(4)** %ptr0, align 8
+; CHECK:  store i8 addrspace(4)* %val, i8 addrspace(4)** %ptr1, align 8
+; CHECK-NOT: load i64
+; CHECK-NOT: store i64
+  %val = load i8 addrspace(4)*, i8 addrspace(4)** %ptr0
+  store i8 addrspace(4)* %val, i8 addrspace(4)** %ptr1
+  ret void
+}
+
+define void @f_3(i8 addrspace(3)** %ptr0, i8 addrspace(3)** %ptr1) {
+; It *is* okay to convert the load/store pair to load and store
+; integers, since pointers in address space 3 are integral.
+
+; CHECK-LABEL: @f_3(
+entry:
+; CHECK: load i64
+; CHECK:  store i64
+  %val = load i8 addrspace(3)*, i8 addrspace(3)** %ptr0
+  store i8 addrspace(3)* %val, i8 addrspace(3)** %ptr1
+  ret void
+}
+
+define i64 @g(i8 addrspace(4)** %gp) {
+  ; CHECK-LABEL: @g(
+  ; CHECK: load
+  %.pre = load i8 addrspace(4)*, i8 addrspace(4)** %gp, align 8
+  %v74 = call i8 addrspace(4)* @alloc()
+  %v75 = addrspacecast i8 addrspace(4)* %v74 to i8*
+  %v76 = bitcast i8* %v75 to i8 addrspace(4)**
+  %v77 = getelementptr i8 addrspace(4)*, i8 addrspace(4)** %v76, i64 -1
+  ; CHECK: store
+  store i8 addrspace(4)* %.pre, i8 addrspace(4)** %v77, align 8
+  %v80 = bitcast i8 addrspace(4)** %v77 to i64*
+  ; CHECK: load
+  ; CHECK-NOT: ptrtoint
+  %v81 = load i64, i64* %v80, align 8
+  ret i64 %v81
+}
+
+define i64 @g2(i8* addrspace(4)* %gp) {
+  ; CHECK-LABEL: @g2(
+  ; CHECK: load
+  %.pre = load i8*, i8* addrspace(4)* %gp, align 8
+  %v74 = call i8 addrspace(4)* @alloc()
+  %v76 = bitcast i8 addrspace(4)* %v74 to i8* addrspace(4)*
+  %v77 = getelementptr i8*, i8* addrspace(4)* %v76, i64 -1
+  ; CHECK: store
+  store i8* %.pre, i8* addrspace(4)* %v77, align 8
+  %v80 = bitcast i8* addrspace(4)* %v77 to i64 addrspace(4)*
+  ; CHECK-NOT: store
+  %v81 = load i64, i64 addrspace(4)* %v80, align 8
+  ret i64 %v81
+}
+
+declare i8 addrspace(4)* @alloc()
+
+define i64 @f_4(i8 addrspace(4)* %v0) {
+  ; CHECK-LABEL: @f_4(
+  ; CHECK-NOT: ptrtoint
+  %v5 = bitcast i64 (i64)* @f_5 to i64 (i8 addrspace(4)*)*
+  %v6 = call i64 %v5(i8 addrspace(4)* %v0)
+  ret i64 %v6
+}
+
+declare i64 @f_5(i64)
diff --git a/llvm/test/Transforms/InstCombine/nonnull-attribute.ll b/llvm/test/Transforms/InstCombine/nonnull-attribute.ll
new file mode 100644
index 00000000000..74fb0911492
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/nonnull-attribute.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This test makes sure that we do not assume globals in address spaces other
+; than 0 are able to be null.
+
+@as0 = external global i32
+@as1 = external addrspace(1) global i32
+
+declare void @addrspace0(i32*)
+declare void @addrspace1(i32 addrspace(1)*)
+
+; CHECK: call void @addrspace0(i32* nonnull @as0)
+; CHECK: call void @addrspace1(i32 addrspace(1)* @as1)
+
+define void @test() {
+  call void @addrspace0(i32* @as0)
+  call void @addrspace1(i32 addrspace(1)* @as1)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/not.ll b/llvm/test/Transforms/InstCombine/not.ll
new file mode 100644
index 00000000000..42a910aa3f2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/not.ll
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = xor i32 %A, -1
+  %C = xor i32 %B, -1
+  ret i32 %C
+}
+
+define i1 @invert_icmp(i32 %A, i32 %B) {
+; CHECK-LABEL: @invert_icmp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp sle i32 %A, %B
+  %not = xor i1 %cmp, true
+  ret i1 %not
+}
+
+; PR1570
+
+define i1 @invert_fcmp(float %X, float %Y) {
+; CHECK-LABEL: @invert_fcmp(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp uge float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp olt float %X, %Y
+  %not = xor i1 %cmp, true
+  ret i1 %not
+}
+
+; PR2298
+
+define i1 @not_not_cmp(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_not_cmp(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %cmp = icmp slt i32 %nota, %notb
+  ret i1 %cmp
+}
+
+define <2 x i1> @not_not_cmp_vector(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @not_not_cmp_vector(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %nota = xor <2 x i32> %a, <i32 -1, i32 -1>
+  %notb = xor <2 x i32> %b, <i32 -1, i32 -1>
+  %cmp = icmp ugt <2 x i32> %nota, %notb
+  ret <2 x i1> %cmp
+}
+
+define i1 @not_cmp_constant(i32 %a) {
+; CHECK-LABEL: @not_cmp_constant(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A:%.*]], -43
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %nota = xor i32 %a, -1
+  %cmp = icmp ugt i32 %nota, 42
+  ret i1 %cmp
+}
+
+define <2 x i1> @not_cmp_constant_vector(<2 x i32> %a) {
+; CHECK-LABEL: @not_cmp_constant_vector(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 -43, i32 -43>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %nota = xor <2 x i32> %a, <i32 -1, i32 -1>
+  %cmp = icmp slt <2 x i32> %nota, <i32 42, i32 42>
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @test7(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[COND:%.*]] = icmp sgt <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[COND]]
+;
+  %cond = icmp sle <2 x i32> %A, %B
+  %Ret = xor <2 x i1> %cond, <i1 true, i1 true>
+  ret <2 x i1> %Ret
+}
+
+define i32 @not_ashr_not(i32 %A, i32 %B) {
+; CHECK-LABEL: @not_ashr_not(
+; CHECK-NEXT:    [[NOT2:%.*]] = ashr i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[NOT2]]
+;
+  %not1 = xor i32 %A, -1
+  %ashr = ashr i32 %not1, %B
+  %not2 = xor i32 %ashr, -1
+  ret i32 %not2
+}
+
+define i8 @not_ashr_const(i8 %x) {
+; CHECK-LABEL: @not_ashr_const(
+; CHECK-NEXT:    [[NOT:%.*]] = lshr i8 41, [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[NOT]]
+;
+  %shr = ashr i8 -42, %x
+  %not = xor i8 %shr, -1
+  ret i8 %not
+}
+
+define <2 x i8> @not_ashr_const_splat(<2 x i8> %x) {
+; CHECK-LABEL: @not_ashr_const_splat(
+; CHECK-NEXT:    [[NOT:%.*]] = lshr <2 x i8> <i8 41, i8 41>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[NOT]]
+;
+  %shr = ashr <2 x i8> <i8 -42, i8 -42>, %x
+  %not = xor <2 x i8> %shr, <i8 -1, i8 -1>
+  ret <2 x i8> %not
+}
+
+; We can't get rid of the 'not' on a logical shift of a negative constant.
+
+define i8 @not_lshr_const_negative(i8 %x) {
+; CHECK-LABEL: @not_lshr_const_negative(
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 -42, [[X:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[SHR]], -1
+; CHECK-NEXT:    ret i8 [[NOT]]
+;
+  %shr = lshr i8 -42, %x
+  %not = xor i8 %shr, -1
+  ret i8 %not
+}
+
+define i8 @not_lshr_const(i8 %x) {
+; CHECK-LABEL: @not_lshr_const(
+; CHECK-NEXT:    [[NOT:%.*]] = ashr i8 -43, [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[NOT]]
+;
+  %shr = lshr i8 42, %x
+  %not = xor i8 %shr, -1
+  ret i8 %not
+}
+
+define <2 x i8> @not_lshr_const_splat(<2 x i8> %x) {
+; CHECK-LABEL: @not_lshr_const_splat(
+; CHECK-NEXT:    [[NOT:%.*]] = ashr <2 x i8> <i8 -43, i8 -43>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[NOT]]
+;
+  %shr = lshr <2 x i8> <i8 42, i8 42>, %x
+  %not = xor <2 x i8> %shr, <i8 -1, i8 -1>
+  ret <2 x i8> %not
+}
+
+define i32 @not_sub(i32 %y) {
+; CHECK-LABEL: @not_sub(
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y:%.*]], -124
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %s = sub i32 123, %y
+  %r = xor i32 %s, -1
+  ret i32 %r
+}
+
+define i32 @not_sub_extra_use(i32 %y, i32* %p) {
+; CHECK-LABEL: @not_sub_extra_use(
+; CHECK-NEXT:    [[S:%.*]] = sub i32 123, [[Y:%.*]]
+; CHECK-NEXT:    store i32 [[S]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y]], -124
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %s = sub i32 123, %y
+  store i32 %s, i32* %p
+  %r = xor i32 %s, -1
+  ret i32 %r
+}
+
+define <2 x i32> @not_sub_splat(<2 x i32> %y) {
+; CHECK-LABEL: @not_sub_splat(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], <i32 -124, i32 -124>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %s = sub <2 x i32> <i32 123, i32 123>, %y
+  %r = xor <2 x i32> %s, <i32 -1, i32 -1>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @not_sub_extra_use_splat(<2 x i32> %y, <2 x i32>* %p) {
+; CHECK-LABEL: @not_sub_extra_use_splat(
+; CHECK-NEXT:    [[S:%.*]] = sub <2 x i32> <i32 123, i32 123>, [[Y:%.*]]
+; CHECK-NEXT:    store <2 x i32> [[S]], <2 x i32>* [[P:%.*]], align 8
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y]], <i32 -124, i32 -124>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %s = sub <2 x i32> <i32 123, i32 123>, %y
+  store <2 x i32> %s, <2 x i32>* %p
+  %r = xor <2 x i32> %s, <i32 -1, i32 -1>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @not_sub_vec(<2 x i32> %y) {
+; CHECK-LABEL: @not_sub_vec(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], <i32 -43, i32 -124>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %s = sub <2 x i32> <i32 42, i32 123>, %y
+  %r = xor <2 x i32> %s, <i32 -1, i32 -1>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @not_sub_extra_use_vec(<2 x i32> %y, <2 x i32>* %p) {
+; CHECK-LABEL: @not_sub_extra_use_vec(
+; CHECK-NEXT:    [[S:%.*]] = sub <2 x i32> <i32 123, i32 42>, [[Y:%.*]]
+; CHECK-NEXT:    store <2 x i32> [[S]], <2 x i32>* [[P:%.*]], align 8
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y]], <i32 -124, i32 -43>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %s = sub <2 x i32> <i32 123, i32 42>, %y
+  store <2 x i32> %s, <2 x i32>* %p
+  %r = xor <2 x i32> %s, <i32 -1, i32 -1>
+  ret <2 x i32> %r
+}
+
+; ~(X + C) --> -X - C - 1 --> -(C + 1) - X
+
+define i32 @not_add(i32 %x) {
+; CHECK-LABEL: @not_add(
+; CHECK-NEXT:    [[R:%.*]] = sub i32 -124, [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add i32 %x, 123
+  %r = xor i32 %a, -1
+  ret i32 %r
+}
+
+define <2 x i32> @not_add_splat(<2 x i32> %x) {
+; CHECK-LABEL: @not_add_splat(
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i32> <i32 -124, i32 -124>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = add <2 x i32> %x, <i32 123, i32 123>
+  %r = xor <2 x i32> %a, <i32 -1, i32 -1>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @not_add_vec(<2 x i32> %x) {
+; CHECK-LABEL: @not_add_vec(
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i32> <i32 -43, i32 -124>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = add <2 x i32> %x, <i32 42, i32 123>
+  %r = xor <2 x i32> %a, <i32 -1, i32 -1>
+  ret <2 x i32> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/nothrow.ll b/llvm/test/Transforms/InstCombine/nothrow.ll
new file mode 100644
index 00000000000..08d90bfbd7d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/nothrow.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | not grep call
+; rdar://6880732
+declare double @t1(i32) readonly
+
+define void @t2() nounwind {
+  call double @t1(i32 42)  ;; dead call even though callee is not nothrow.
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/nsw.ll b/llvm/test/Transforms/InstCombine/nsw.ll
new file mode 100644
index 00000000000..8cb6421268f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/nsw.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @sub1(i32 %x) {
+; CHECK-LABEL: @sub1(
+; CHECK-NEXT:    [[Y:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i32 [[Y]], 337
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %y = sub i32 0, %x
+  %z = sdiv i32 %y, 337
+  ret i32 %z
+}
+
+define i32 @sub2(i32 %x) {
+; CHECK-LABEL: @sub2(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i32 [[X:%.*]], -337
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %y = sub nsw i32 0, %x
+  %z = sdiv i32 %y, 337
+  ret i32 %z
+}
+
+define i1 @shl_icmp(i64 %X) {
+; CHECK-LABEL: @shl_icmp(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i64 [[X:%.*]], 0
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = shl nuw i64 %X, 2   ; X/4
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+define i64 @shl1(i64 %X, i64* %P) {
+; CHECK-LABEL: @shl1(
+; CHECK-NEXT:    [[A:%.*]] = and i64 [[X:%.*]], 312
+; CHECK-NEXT:    store i64 [[A]], i64* [[P:%.*]], align 4
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw i64 [[A]], 8
+; CHECK-NEXT:    ret i64 [[B]]
+;
+  %A = and i64 %X, 312
+  store i64 %A, i64* %P  ; multiple uses of A.
+  %B = shl i64 %A, 8
+  ret i64 %B
+}
+
+define i32 @preserve1(i32 %x) {
+; CHECK-LABEL: @preserve1(
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[ADD3]]
+;
+  %add = add nsw i32 %x, 2
+  %add3 = add nsw i32 %add, 3
+  ret i32 %add3
+}
+
+define i8 @nopreserve1(i8 %x) {
+; CHECK-LABEL: @nopreserve1(
+; CHECK-NEXT:    [[ADD3:%.*]] = add i8 [[X:%.*]], -126
+; CHECK-NEXT:    ret i8 [[ADD3]]
+;
+  %add = add nsw i8 %x, 127
+  %add3 = add nsw i8 %add, 3
+  ret i8 %add3
+}
+
+define i8 @nopreserve2(i8 %x) {
+; CHECK-LABEL: @nopreserve2(
+; CHECK-NEXT:    [[ADD3:%.*]] = add i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i8 [[ADD3]]
+;
+  %add = add i8 %x, 1
+  %add3 = add nsw i8 %add, 2
+  ret i8 %add3
+}
+
+define i8 @nopreserve3(i8 %A, i8 %B) {
+; CHECK-LABEL: @nopreserve3(
+; CHECK-NEXT:    [[Y:%.*]] = add i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y]], 20
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %x = add i8 %A, 10
+  %y = add i8 %B, 10
+  %add = add nsw i8 %x, %y
+  ret i8 %add
+}
+
+define i8 @nopreserve4(i8 %A, i8 %B) {
+; CHECK-LABEL: @nopreserve4(
+; CHECK-NEXT:    [[Y:%.*]] = add i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y]], 20
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %x = add nsw i8 %A, 10
+  %y = add nsw i8 %B, 10
+  %add = add nsw i8 %x, %y
+  ret i8 %add
+}
+
+; TODO: computeKnownBits() should look through a shufflevector.
+
+define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec(
+; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[T3:%.*]] = shl nsw <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17>
+; CHECK-NEXT:    ret <3 x i32> [[T3]]
+;
+  %t2 = zext <2 x i8> %x to <2 x i32>
+  %shuf = shufflevector <2 x i32> %t2, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1>
+  %t3 = shl <3 x i32> %shuf, <i32 17, i32 17, i32 17>
+  ret <3 x i32> %t3
+}
+
+; Negative test - if the shuffle mask contains an undef, we bail out to
+; avoid propagating information that may not be used consistently by callers.
+
+define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec(
+; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <3 x i32> <i32 1, i32 undef, i32 0>
+; CHECK-NEXT:    [[T3:%.*]] = shl <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17>
+; CHECK-NEXT:    ret <3 x i32> [[T3]]
+;
+  %t2 = zext <2 x i8> %x to <2 x i32>
+  %shuf = shufflevector <2 x i32> %t2, <2 x i32> undef, <3 x i32> <i32 1, i32 undef, i32 0>
+  %t3 = shl <3 x i32> %shuf, <i32 17, i32 17, i32 17>
+  ret <3 x i32> %t3
+}
+
diff --git a/llvm/test/Transforms/InstCombine/obfuscated_splat.ll b/llvm/test/Transforms/InstCombine/obfuscated_splat.ll
new file mode 100644
index 00000000000..c37456cc9cf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/obfuscated_splat.ll
@@ -0,0 +1,11 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) {
+  %A = load <4 x float>, <4 x float>* %in_ptr, align 16
+  %B = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
+  %C = shufflevector <4 x float> %B, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 4, i32 undef>
+  %D = shufflevector <4 x float> %C, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK:  %D = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> zeroinitializer
+  store <4 x float> %D, <4 x float> *%out_ptr
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/objsize-64.ll b/llvm/test/Transforms/InstCombine/objsize-64.ll
new file mode 100644
index 00000000000..866bc4f937b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/objsize-64.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare noalias i8* @malloc(i32) nounwind
+declare noalias i8* @_Znwm(i64)  ; new(unsigned long)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_call_unexpected(i8*)
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+
+; CHECK-LABEL: @f1(
+define i64 @f1(i8 **%esc) {
+  %call = call i8* @malloc(i32 4)
+  store i8* %call, i8** %esc
+  %size = call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+; CHECK: ret i64 4
+  ret i64 %size
+}
+
+
+; CHECK-LABEL: @f2(
+define i64 @f2(i8** %esc) nounwind uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; CHECK: invoke noalias i8* @_Znwm(i64 13)
+  %call = invoke noalias i8* @_Znwm(i64 13)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; CHECK: ret i64 13
+  store i8* %call, i8** %esc
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false)
+  ret i64 %0
+
+lpad:
+  %1 = landingpad { i8*, i32 }
+          filter [0 x i8*] zeroinitializer
+  %2 = extractvalue { i8*, i32 } %1, 0
+  tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind
+  unreachable
+}
diff --git a/llvm/test/Transforms/InstCombine/objsize-address-space.ll b/llvm/test/Transforms/InstCombine/objsize-address-space.ll
new file mode 100644
index 00000000000..ab4b64dfbf0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/objsize-address-space.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -instcombine -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)*, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+declare i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)*, i1) nounwind readonly
+
+@array_as2 = private addrspace(2) global [60 x i8] zeroinitializer, align 4
+
+@array_as1_pointers = private global [10 x i32 addrspace(1)*] zeroinitializer, align 4
+@array_as2_pointers = private global [24 x i32 addrspace(2)*] zeroinitializer, align 4
+@array_as3_pointers = private global [42 x i32 addrspace(3)*] zeroinitializer, align 4
+
+@array_as2_as1_pointer_pointers = private global [16 x i32 addrspace(2)* addrspace(1)*] zeroinitializer, align 4
+
+
+@a_as3 = private addrspace(3) global [60 x i8] zeroinitializer, align 1
+
+define i32 @foo_as3() nounwind {
+; CHECK-LABEL: @foo_as3(
+; CHECK-NEXT: ret i32 60
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i16 @foo_as3_i16() nounwind {
+; CHECK-LABEL: @foo_as3_i16(
+; CHECK-NEXT: ret i16 60
+  %1 = call i16 @llvm.objectsize.i16.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(3)* @a_as3, i32 0, i32 0), i1 false)
+  ret i16 %1
+}
+
+@a_alias = weak alias [60 x i8], [60 x i8] addrspace(3)* @a_as3
+define i32 @foo_alias() nounwind {
+  %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i32 @array_as2_size() {
+; CHECK-LABEL: @array_as2_size(
+; CHECK-NEXT: ret i32 60
+  %bc = bitcast [60 x i8] addrspace(2)* @array_as2 to i8 addrspace(2)*
+  %1 = call i32 @llvm.objectsize.i32.p2i8(i8 addrspace(2)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as1() {
+; CHECK-LABEL: @pointer_array_as1(
+; CHECK-NEXT: ret i32 80
+  %bc = addrspacecast [10 x i32 addrspace(1)*]* @array_as1_pointers to i8 addrspace(1)*
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as2() {
+; CHECK-LABEL: @pointer_array_as2(
+; CHECK-NEXT: ret i32 24
+  %bc = bitcast [24 x i32 addrspace(2)*]* @array_as2_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_array_as3() {
+; CHECK-LABEL: @pointer_array_as3(
+; CHECK-NEXT: ret i32 84
+  %bc = bitcast [42 x i32 addrspace(3)*]* @array_as3_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+define i32 @pointer_pointer_array_as2_as1() {
+; CHECK-LABEL: @pointer_pointer_array_as2_as1(
+; CHECK-NEXT: ret i32 128
+  %bc = bitcast [16 x i32 addrspace(2)* addrspace(1)*]* @array_as2_as1_pointer_pointers to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false)
+  ret i32 %1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/objsize-noverify.ll b/llvm/test/Transforms/InstCombine/objsize-noverify.ll
new file mode 100644
index 00000000000..7e469bd2528
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/objsize-noverify.ll
@@ -0,0 +1,43 @@
+; Test objectsize bounds checking that won't verify until after -instcombine.
+; RUN: opt < %s -disable-verify -instcombine -S | opt -S | FileCheck %s
+; We need target data to get the sizes of the arrays and structures.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) nounwind readonly
+
+; CHECK-LABEL: @PR13390(
+define i32 @PR13390(i1 %bool, i8* %a) {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+xpto:
+  %select = select i1 %bool, i8* %select, i8* %a
+  %select2 = select i1 %bool, i8* %a, i8* %select2
+  %0 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %select2, i1 true)
+  %2 = add i32 %0, %1
+; CHECK: ret i32 undef
+  ret i32 %2
+
+return:
+  ret i32 42
+}
+
+; CHECK-LABEL: @PR13621(
+define i32 @PR13621(i1 %bool) nounwind {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %gep2 = getelementptr i8, i8* %gep, i32 1
+  %gep = getelementptr i8, i8* %gep2, i32 1
+  %o = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 true)
+; CHECK: ret i32 undef
+  ret i32 %o
+
+return:
+  ret i32 7
+}
diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll
new file mode 100644
index 00000000000..97c708fb6bf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/objsize.ll
@@ -0,0 +1,303 @@
+; Test a pile of objectsize bounds checking.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; We need target data to get the sizes of the arrays and structures.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = private global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*>
+@.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*>
+define i32 @foo() nounwind {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: ret i32 60
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false, i1 false)
+  ret i32 %1
+}
+
+define i8* @bar() nounwind {
+; CHECK-LABEL: @bar(
+entry:
+  %retval = alloca i8*
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false, i1 false)
+  %cmp = icmp ne i32 %0, -1
+; CHECK: br i1 true
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+  %1 = load i8*, i8** %retval
+  ret i8* %1
+
+cond.false:
+  %2 = load i8*, i8** %retval
+  ret i8* %2
+}
+
+define i32 @f() nounwind {
+; CHECK-LABEL: @f(
+; CHECK-NEXT: ret i32 0
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr ([60 x i8], [60 x i8]* @a, i32 1, i32 0), i1 false, i1 false, i1 false)
+  ret i32 %1
+}
+
+@window = external global [0 x i8]
+
+define i1 @baz() nounwind {
+; CHECK-LABEL: @baz(
+; CHECK-NEXT: objectsize
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 0), i1 false, i1 false, i1 false)
+  %2 = icmp eq i32 %1, -1
+  ret i1 %2
+}
+
+define void @test1(i8* %q, i32 %x) nounwind noinline {
+; CHECK-LABEL: @test1(
+; CHECK: objectsize.i32.p0i8
+entry:
+  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 10), i1 false, i1 false, i1 false) ; <i64> [#uses=1]
+  %1 = icmp eq i32 %0, -1                         ; <i1> [#uses=1]
+  br i1 %1, label %"47", label %"46"
+
+"46":                                             ; preds = %entry
+  unreachable
+
+"47":                                             ; preds = %entry
+  unreachable
+}
+
+@.str5 = private constant [9 x i32] [i32 97, i32 98, i32 99, i32 100, i32 0, i32
+ 101, i32 102, i32 103, i32 0], align 4
+define i32 @test2() nounwind {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i32 34
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr (i8, i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false, i1 false, i1 false)
+  ret i32 %1
+}
+
+; rdar://7674946
+@array = internal global [480 x float] zeroinitializer ; <[480 x float]*> [#uses=1]
+
+declare i8* @__memcpy_chk(i8*, i8*, i32, i32) nounwind
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) nounwind readonly
+
+declare i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)*, i1, i1, i1) nounwind readonly
+
+declare i8* @__inline_memcpy_chk(i8*, i8*, i32) nounwind inlinehint
+
+define void @test3() nounwind {
+; CHECK-LABEL: @test3(
+entry:
+  br i1 undef, label %bb11, label %bb12
+
+bb11:
+  %0 = getelementptr inbounds float, float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
+  %1 = bitcast float* %0 to i8*                   ; <i8*> [#uses=1]
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false, i1 false, i1 false) ; <i32> [#uses=1]
+  %3 = call i8* @__memcpy_chk(i8* undef, i8* undef, i32 512, i32 %2) nounwind ; <i8*> [#uses=0]
+; CHECK: unreachable
+  unreachable
+
+bb12:
+  %4 = getelementptr inbounds float, float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
+  %5 = bitcast float* %4 to i8*                   ; <i8*> [#uses=1]
+  %6 = call i8* @__inline_memcpy_chk(i8* %5, i8* undef, i32 512) nounwind inlinehint ; <i8*> [#uses=0]
+; CHECK: @__inline_memcpy_chk
+  unreachable
+}
+
+; rdar://7718857
+
+%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+define i32 @test4(i8** %esc) nounwind ssp {
+; CHECK-LABEL: @test4(
+entry:
+  %0 = alloca %struct.data, align 8
+  %1 = bitcast %struct.data* %0 to i8*
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false, i1 false, i1 false) nounwind
+; CHECK-NOT: @llvm.objectsize
+; CHECK: @llvm.memset.p0i8.i32(i8* nonnull align 8 %1, i8 0, i32 1824, i1 false)
+  %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
+  store i8* %1, i8** %esc
+  ret i32 0
+}
+
+; rdar://7782496
+@s = external global i8*
+
+define i8* @test5(i32 %n) nounwind ssp {
+; CHECK-LABEL: @test5(
+entry:
+  %0 = tail call noalias i8* @malloc(i32 20) nounwind
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false, i1 false, i1 false)
+  %2 = load i8*, i8** @s, align 8
+; CHECK-NOT: @llvm.objectsize
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 %1, i32 10, i1 false)
+  %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
+  ret i8* %0
+}
+
+define void @test6(i32 %n) nounwind ssp {
+; CHECK-LABEL: @test6(
+entry:
+  %0 = tail call noalias i8* @malloc(i32 20) nounwind
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false, i1 false, i1 false)
+  %2 = load i8*, i8** @s, align 8
+; CHECK-NOT: @llvm.objectsize
+; CHECK: @__memcpy_chk(i8* %0, i8* %1, i32 30, i32 20)
+  %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 30, i32 %1) nounwind
+  ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
+
+declare noalias i8* @malloc(i32) nounwind
+
+define i32 @test7(i8** %esc) {
+; CHECK-LABEL: @test7(
+  %alloc = call noalias i8* @malloc(i32 48) nounwind
+  store i8* %alloc, i8** %esc
+  %gep = getelementptr inbounds i8, i8* %alloc, i32 16
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false, i1 false, i1 false) nounwind readonly
+; CHECK: ret i32 32
+  ret i32 %objsize
+}
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define i32 @test8(i8** %esc) {
+; CHECK-LABEL: @test8(
+  %alloc = call noalias i8* @calloc(i32 5, i32 7) nounwind
+  store i8* %alloc, i8** %esc
+  %gep = getelementptr inbounds i8, i8* %alloc, i32 5
+  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false, i1 false, i1 false) nounwind readonly
+; CHECK: ret i32 30
+  ret i32 %objsize
+}
+
+declare noalias i8* @strdup(i8* nocapture) nounwind
+declare noalias i8* @strndup(i8* nocapture, i32) nounwind
+
+; CHECK-LABEL: @test9(
+define i32 @test9(i8** %esc) {
+  %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0)) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test10(
+define i32 @test10(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+; CHECK: ret i32 4
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test11(
+define i32 @test11(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test12(
+define i32 @test12(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test13(
+define i32 @test13(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+@globalalias = internal alias [60 x i8], [60 x i8]* @a
+
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: ret i32 60
+define i32 @test18() {
+  %bc = bitcast [60 x i8]* @globalalias to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false, i1 false, i1 false)
+  ret i32 %1
+}
+
+@globalalias2 = weak alias [60 x i8], [60 x i8]* @a
+
+; CHECK-LABEL: @test19(
+; CHECK: llvm.objectsize
+define i32 @test19() {
+  %bc = bitcast [60 x i8]* @globalalias2 to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false, i1 false, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test20(
+; CHECK: ret i32 0
+define i32 @test20() {
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false, i1 false, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test21(
+; CHECK: ret i32 0
+define i32 @test21() {
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 true, i1 false, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test22(
+; CHECK: llvm.objectsize
+define i32 @test22() {
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false, i1 true, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test23(
+; CHECK: llvm.objectsize
+define i32 @test23() {
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 true, i1 true, i1 false)
+  ret i32 %1
+}
+
+; 1 is an arbitrary non-zero address space.
+; CHECK-LABEL: @test24(
+; CHECK: llvm.objectsize
+define i32 @test24() {
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 false,
+                                          i1 false, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test25(
+; CHECK: llvm.objectsize
+define i32 @test25() {
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 true,
+                                          i1 false, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test26(
+; CHECK: llvm.objectsize
+define i32 @test26() {
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 false,
+                                          i1 true, i1 false)
+  ret i32 %1
+}
+
+; CHECK-LABEL: @test27(
+; CHECK: llvm.objectsize
+define i32 @test27() {
+  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 true,
+                                          i1 true, i1 false)
+  ret i32 %1
+}
diff --git a/llvm/test/Transforms/InstCombine/odr-linkage.ll b/llvm/test/Transforms/InstCombine/odr-linkage.ll
new file mode 100644
index 00000000000..73675efb08b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/odr-linkage.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | grep "ret i32 10"
+
+@g1 = available_externally constant i32 1
+@g2 = linkonce_odr constant i32 2
+@g3 = weak_odr constant i32 3
+@g4 = internal constant i32 4
+
+define i32 @test() {
+  %A = load i32, i32* @g1
+  %B = load i32, i32* @g2
+  %C = load i32, i32* @g3
+  %D = load i32, i32* @g4
+  
+  %a = add i32 %A, %B
+  %b = add i32 %a, %C
+  %c = add i32 %b, %D
+  ret i32 %c
+}
+   
diff --git a/llvm/test/Transforms/InstCombine/onehot_merge.ll b/llvm/test/Transforms/InstCombine/onehot_merge.ll
new file mode 100644
index 00000000000..47a4ca4b628
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll
@@ -0,0 +1,111 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;CHECK: @and_consts
+;CHECK: and i32 %k, 12
+;CHECK: icmp ne i32 %0, 12
+;CHECK: ret
+define i1 @and_consts(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp1 = and i32 4, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 8, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+;CHECK: @foo1_and
+;CHECK:  shl i32 1, %c1
+;CHECK-NEXT:  lshr i32 -2147483648, %c2
+;CHECK-NEXT:  or i32
+;CHECK-NEXT:  and i32
+;CHECK-NEXT:  icmp ne i32 %1, %0
+;CHECK: ret
+define i1 @foo1_and(i32 %k, i32 %c1, i32 %c2) {
+bb:
+  %tmp = shl i32 1, %c1
+  %tmp4 = lshr i32 -2147483648, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+; Same as above but with operands commuted one of the ands, but not the other.
+define i1 @foo1_and_commuted(i32 %k, i32 %c1, i32 %c2) {
+; CHECK-LABEL: @foo1_and_commuted(
+; CHECK-NEXT:    [[K2:%.*]] = mul i32 [[K:%.*]], [[K]]
+; CHECK-NEXT:    [[TMP:%.*]] = shl i32 1, [[C1:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = or i32 [[TMP]], [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[K2]], [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %k2 = mul i32 %k, %k ; to trick the complexity sorting
+  %tmp = shl i32 1, %c1
+  %tmp4 = lshr i32 -2147483648, %c2
+  %tmp1 = and i32 %k2, %tmp
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %tmp5 = and i32 %tmp4, %k2
+  %tmp6 = icmp eq i32 %tmp5, 0
+  %or = or i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+define i1 @or_consts(i32 %k, i32 %c1, i32 %c2) {
+; CHECK-LABEL: @or_consts(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[K:%.*]], 12
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 12
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = and i32 4, %k
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp5 = and i32 8, %k
+  %tmp6 = icmp ne i32 %tmp5, 0
+  %or = and i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+define i1 @foo1_or(i32 %k, i32 %c1, i32 %c2) {
+; CHECK-LABEL: @foo1_or(
+; CHECK-NEXT:    [[TMP:%.*]] = shl i32 1, [[C1:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP]], [[TMP4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[K:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %tmp = shl i32 1, %c1
+  %tmp4 = lshr i32 -2147483648, %c2
+  %tmp1 = and i32 %tmp, %k
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp5 = and i32 %tmp4, %k
+  %tmp6 = icmp ne i32 %tmp5, 0
+  %or = and i1 %tmp2, %tmp6
+  ret i1 %or
+}
+
+; Same as above but with operands commuted one of the ors, but not the other.
+define i1 @foo1_or_commuted(i32 %k, i32 %c1, i32 %c2) {
+; CHECK-LABEL: @foo1_or_commuted(
+; CHECK-NEXT:    [[K2:%.*]] = mul i32 [[K:%.*]], [[K]]
+; CHECK-NEXT:    [[TMP:%.*]] = shl i32 1, [[C1:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 -2147483648, [[C2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TMP]], [[TMP4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[K2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %k2 = mul i32 %k, %k ; to trick the complexity sorting
+  %tmp = shl i32 1, %c1
+  %tmp4 = lshr i32 -2147483648, %c2
+  %tmp1 = and i32 %k2, %tmp
+  %tmp2 = icmp ne i32 %tmp1, 0
+  %tmp5 = and i32 %tmp4, %k2
+  %tmp6 = icmp ne i32 %tmp5, 0
+  %or = and i1 %tmp2, %tmp6
+  ret i1 %or
+}
diff --git a/llvm/test/Transforms/InstCombine/opaque.ll b/llvm/test/Transforms/InstCombine/opaque.ll
new file mode 100644
index 00000000000..18cbef5281b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/opaque.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -disable-output
+; Checks that bitcasts are not converted into GEP when
+; when the size of an aggregate cannot be determined.
+%swift.opaque = type opaque
+%SQ = type <{ [8 x i8] }>
+%Si = type <{ i64 }>
+
+%V = type <{ <{ %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8 }>, %Si, %SQ, %SQ, %Si, %swift.opaque }>
+%Vs4Int8 = type <{ i8 }>
+%swift.type = type { i64 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #8
+
+@_swift_slowAlloc = external global i8* (i64, i64)*
+
+declare i8* @rt_swift_slowAlloc(i64, i64)
+
+define  %swift.opaque* @_TwTkV([24 x i8]* %dest, %swift.opaque* %src,
+%swift.type* %bios_boot_params) #0 {
+entry:
+  %0 = bitcast %swift.opaque* %src to %V*
+  %1 = call noalias i8* @rt_swift_slowAlloc(i64 40, i64 0) #11
+  %2 = bitcast [24 x i8]* %dest to i8**
+  store i8* %1, i8** %2, align 8
+  %3 = bitcast i8* %1 to %V*
+  %4 = bitcast %V* %3 to i8*
+  %5 = bitcast %V* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 40, i1 false)
+  %6 = bitcast %V* %3 to %swift.opaque*
+  ret %swift.opaque* %6
+}
diff --git a/llvm/test/Transforms/InstCombine/operand-complexity.ll b/llvm/test/Transforms/InstCombine/operand-complexity.ll
new file mode 100644
index 00000000000..c67fb0803e0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/operand-complexity.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; 'Negate' is considered less complex than a normal binop, so the xor should have the binop as the first operand.
+
+define i8 @neg(i8 %x) {
+; CHECK-LABEL: @neg(
+; CHECK-NEXT:    [[BO:%.*]] = udiv i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[NEGX:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[BO]], [[NEGX]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %bo = udiv i8 %x, 42
+  %negx = sub i8 0, %x
+  %r = xor i8 %negx, %bo
+  ret i8 %r
+}
+
+define <2 x i8> @neg_vec(<2 x i8> %x) {
+; CHECK-LABEL: @neg_vec(
+; CHECK-NEXT:    [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 42, i8 -42>
+; CHECK-NEXT:    [[NEGX:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[BO]], [[NEGX]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %bo = udiv <2 x i8> %x, <i8 42, i8 -42>
+  %negx = sub <2 x i8> <i8 0, i8 0>, %x
+  %r = xor <2 x i8> %negx, %bo
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @neg_vec_undef(<2 x i8> %x) {
+; CHECK-LABEL: @neg_vec_undef(
+; CHECK-NEXT:    [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 42, i8 -42>
+; CHECK-NEXT:    [[NEGX:%.*]] = sub <2 x i8> <i8 0, i8 undef>, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[BO]], [[NEGX]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %bo = udiv <2 x i8> %x, <i8 42, i8 -42>
+  %negx = sub <2 x i8> <i8 0, i8 undef>, %x
+  %r = xor <2 x i8> %negx, %bo
+  ret <2 x i8> %r
+}
+
+; 'Not' is considered less complex than a normal binop, so the mul should have the binop as the first operand.
+
+define i8 @not(i8 %x) {
+; CHECK-LABEL: @not(
+; CHECK-NEXT:    [[BO:%.*]] = udiv i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[NOTX:%.*]] = xor i8 [[X]], -1
+; CHECK-NEXT:    [[R:%.*]] = mul i8 [[BO]], [[NOTX]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %bo = udiv i8 %x, 42
+  %notx = xor i8 -1, %x
+  %r = mul i8 %notx, %bo
+  ret i8 %r
+}
+
+define <2 x i8> @not_vec(<2 x i8> %x) {
+; CHECK-LABEL: @not_vec(
+; CHECK-NEXT:    [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 42, i8 -42>
+; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = mul <2 x i8> [[BO]], [[NOTX]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %bo = udiv <2 x i8> %x, <i8 42, i8 -42>
+  %notx = xor <2 x i8> <i8 -1, i8 -1>, %x
+  %r = mul <2 x i8> %notx, %bo
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @not_vec_undef(<2 x i8> %x) {
+; CHECK-LABEL: @not_vec_undef(
+; CHECK-NEXT:    [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 42, i8 -42>
+; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 undef>
+; CHECK-NEXT:    [[R:%.*]] = mul <2 x i8> [[BO]], [[NOTX]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %bo = udiv <2 x i8> %x, <i8 42, i8 -42>
+  %notx = xor <2 x i8> <i8 -1, i8 undef>, %x
+  %r = mul <2 x i8> %notx, %bo
+  ret <2 x i8> %r
+}
+
+; 'Fneg' is considered less complex than a normal binop, so the fmul should have the binop as the first operand.
+; Extra uses are required to ensure that the fneg is not canonicalized after the fmul.
+
+declare void @use(float)
+declare void @use_vec(<2 x float>)
+
+define float @fneg(float %x) {
+; CHECK-LABEL: @fneg(
+; CHECK-NEXT:    [[BO:%.*]] = fdiv float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[FNEGX:%.*]] = fsub float -0.000000e+00, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = fmul float [[BO]], [[FNEGX]]
+; CHECK-NEXT:    call void @use(float [[FNEGX]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %bo = fdiv float %x, 42.0
+  %fnegx = fsub float -0.0, %x
+  %r = fmul float %fnegx, %bo
+  call void @use(float %fnegx)
+  ret float %r
+}
+
+define <2 x float> @fneg_vec(<2 x float> %x) {
+; CHECK-LABEL: @fneg_vec(
+; CHECK-NEXT:    [[BO:%.*]] = fdiv <2 x float> [[X:%.*]], <float 4.200000e+01, float -4.200000e+01>
+; CHECK-NEXT:    [[FNEGX:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = fmul <2 x float> [[BO]], [[FNEGX]]
+; CHECK-NEXT:    call void @use_vec(<2 x float> [[FNEGX]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %bo = fdiv <2 x float> %x, <float 42.0, float -42.0>
+  %fnegx = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %r = fmul <2 x float> %fnegx, %bo
+  call void @use_vec(<2 x float> %fnegx)
+  ret <2 x float> %r
+}
+
+define <2 x float> @fneg_vec_undef(<2 x float> %x) {
+; CHECK-LABEL: @fneg_vec_undef(
+; CHECK-NEXT:    [[BO:%.*]] = fdiv <2 x float> [[X:%.*]], <float 4.200000e+01, float -4.200000e+01>
+; CHECK-NEXT:    [[FNEGX:%.*]] = fsub <2 x float> <float -0.000000e+00, float undef>, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = fmul <2 x float> [[BO]], [[FNEGX]]
+; CHECK-NEXT:    call void @use_vec(<2 x float> [[FNEGX]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %bo = fdiv <2 x float> %x, <float 42.0, float -42.0>
+  %fnegx = fsub <2 x float> <float -0.0, float undef>, %x
+  %r = fmul <2 x float> %fnegx, %bo
+  call void @use_vec(<2 x float> %fnegx)
+  ret <2 x float> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/or-fcmp.ll b/llvm/test/Transforms/InstCombine/or-fcmp.ll
new file mode 100644
index 00000000000..10ac51ae32b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or-fcmp.ll
@@ -0,0 +1,1556 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @PR1738(double %x, double %y) {
+; CHECK-LABEL: @PR1738(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp1 = fcmp uno double %x, 0.0
+  %cmp2 = fcmp uno double %y, 0.0
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define <2 x i1> @PR1738_vec_undef(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @PR1738_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno <2 x double> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %cmp1 = fcmp uno <2 x double> %x, <double 0.0, double undef>
+  %cmp2 = fcmp uno <2 x double> %y, <double undef, double 0.0>
+  %or = or <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %or
+}
+
+define i1 @PR41069(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: @PR41069(
+; CHECK-NEXT:    [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[TMP1]], [[UNO1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %uno1 = fcmp uno double %a, %b
+  %uno2 = fcmp uno double %c, 0.0
+  %or = or i1 %uno1, %uno2
+  %uno3 = fcmp uno double %d, 0.0
+  %r = or i1 %or, %uno3
+  ret i1 %r
+}
+
+define i1 @PR41069_commute(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: @PR41069_commute(
+; CHECK-NEXT:    [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[TMP1]], [[UNO1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %uno1 = fcmp uno double %a, %b
+  %uno2 = fcmp uno double %c, 0.0
+  %or = or i1 %uno1, %uno2
+  %uno3 = fcmp uno double %d, 0.0
+  %r = or i1 %uno3, %or
+  ret i1 %r
+}
+
+define <2 x i1> @PR41069_vec(<2 x i1> %z, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: @PR41069_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno <2 x float> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i1> [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %uno1 = fcmp uno <2 x float> %c, zeroinitializer
+  %or = or <2 x i1> %uno1, %z
+  %uno2 = fcmp uno <2 x float> %d, <float 0.0, float undef>
+  %r = or <2 x i1> %or, %uno2
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @PR41069_vec_commute(<2 x i1> %z, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: @PR41069_vec_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno <2 x float> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i1> [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %uno1 = fcmp uno <2 x float> %c, zeroinitializer
+  %or = or <2 x i1> %uno1, %z
+  %uno2 = fcmp uno <2 x float> %d, <float 0.0, float undef>
+  %r = or <2 x i1> %uno2, %or
+  ret <2 x i1> %r
+}
+
+define i1 @fcmp_uno_nonzero(float %x, float %y) {
+; CHECK-LABEL: @fcmp_uno_nonzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp1 = fcmp uno float %x, 1.0
+  %cmp2 = fcmp uno float %y, 2.0
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define <3 x i1> @fcmp_uno_nonzero_vec(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: @fcmp_uno_nonzero_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno <3 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %cmp1 = fcmp uno <3 x float> %x, <float 1.0, float 2.0, float 3.0>
+  %cmp2 = fcmp uno <3 x float> %y, <float 3.0, float 2.0, float 1.0>
+  %or = or <3 x i1> %cmp1, %cmp2
+  ret <3 x i1> %or
+}
+
+define i1 @auto_gen_0(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_0(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp false double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_1(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_1(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp oeq double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_2(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oeq double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_3(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_3(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_4(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_4(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_5(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_5(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_6(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_6(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_7(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_7(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_8(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_8(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_9(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_9(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_10(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_10(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_11(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_11(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_12(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_12(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_13(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_13(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_14(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_14(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_15(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_15(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_16(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_16(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_17(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_17(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_18(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_18(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_19(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_19(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_20(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_20(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_21(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_21(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_22(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_22(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_23(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_23(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_24(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_24(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_25(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_25(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_26(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_26(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_27(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_27(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_28(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_28(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_29(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_29(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_30(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_30(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_31(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_31(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_32(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_32(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_33(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_33(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_34(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_34(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_35(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_35(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_36(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_36(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_37(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_37(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_38(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_38(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_39(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_39(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_40(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_40(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_41(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_41(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_42(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_42(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_43(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_43(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_44(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_44(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_45(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_45(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_46(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_46(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_47(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_47(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_48(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_48(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_49(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_49(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_50(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_50(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_51(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_51(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_52(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_52(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_53(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_53(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_54(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_54(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_55(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_55(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_56(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_56(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_57(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_57(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_58(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_58(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_59(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_59(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_60(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_60(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_61(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_61(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_62(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_62(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_63(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_63(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_64(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_65(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_65(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_66(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_66(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_67(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_67(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_68(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_68(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_69(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_69(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_70(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_70(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_71(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_71(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_72(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_72(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_73(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_73(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_74(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_74(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_75(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_75(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_76(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_76(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_77(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_77(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_78(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_78(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_79(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_79(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_80(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_80(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_81(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_81(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_82(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_82(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_83(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_83(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_84(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_84(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_85(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_85(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_86(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_86(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_87(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_87(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_88(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_88(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_89(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_89(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_90(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_90(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_91(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_91(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_92(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_92(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_93(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_93(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_94(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_94(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_95(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_95(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_96(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_96(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_97(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_97(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_98(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_98(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_99(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_99(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_100(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_100(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_101(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_101(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_102(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_102(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_103(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_103(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_104(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_104(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_105(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_105(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_106(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_106(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_107(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_107(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_108(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_108(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_109(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_109(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_110(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_110(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_111(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_111(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_112(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_112(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_113(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_113(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_114(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_114(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_115(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_115(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_116(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_116(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_117(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_117(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_118(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_118(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_119(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_119(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp uno double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_120(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_120(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_121(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_121(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_122(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_122(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_123(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_123(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_124(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_124(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_125(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_125(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_126(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_126(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_127(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_127(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_128(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_128(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_129(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_129(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_130(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_130(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_131(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_131(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_132(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_132(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_133(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_133(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_134(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_134(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp uno double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_135(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_135(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp true double %a, %b
+  %retval = or i1 %cmp, %cmp1
+  ret i1 %retval
+}
diff --git a/llvm/test/Transforms/InstCombine/or-shifted-masks.ll b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll
new file mode 100644
index 00000000000..2066f4a10f2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or-shifted-masks.ll
@@ -0,0 +1,221 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i32 @or_and_shifts1(i32 %x) {
+; CHECK-LABEL: @or_and_shifts1(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 %x, 5
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 32
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %1 = shl i32 %x, 3
+  %2 = and i32 %1, 15
+  %3 = shl i32 %x, 5
+  %4 = and i32 %3, 60
+  %5 = or i32 %2, %4
+  ret i32 %5
+}
+
+define i32 @or_and_shifts2(i32 %x) {
+; CHECK-LABEL: @or_and_shifts2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 896
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 %x, 4
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 7
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %1 = shl i32 %x, 3
+  %2 = and i32 %1, 896
+  %3 = lshr i32 %x, 4
+  %4 = and i32 %3, 7
+  %5 = or i32 %2, %4
+  ret i32 %5
+}
+
+define i32 @or_and_shift_shift_and(i32 %x) {
+; CHECK-LABEL: @or_and_shift_shift_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 56
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 %x, 2
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 28
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %1 = and i32 %x, 7
+  %2 = shl i32 %1, 3
+  %3 = shl i32 %x, 2
+  %4 = and i32 %3, 28
+  %5 = or i32 %2, %4
+  ret i32 %5
+}
+
+define i32 @multiuse1(i32 %x) {
+; CHECK-LABEL: @multiuse1(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 6
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 384
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 %x, 1
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %1 = and i32 %x, 2
+  %2 = and i32 %x, 4
+  %3 = shl nuw nsw i32 %1, 6
+  %4 = lshr exact i32 %1, 1
+  %5 = shl nuw nsw i32 %2, 6
+  %6 = lshr exact i32 %2, 1
+  %7 = or i32 %3, %5
+  %8 = or i32 %4, %6
+  %9 = or i32 %8, %7
+  ret i32 %9
+}
+
+define i32 @multiuse2(i32 %x) {
+; CHECK-LABEL: @multiuse2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 12
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 %x, 8
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 24576
+; CHECK-NEXT:    [[TMP5:%.*]] = shl i32 %x, 8
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 7680
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shl i32 %x, 1
+; CHECK-NEXT:    [[TMP9:%.*]] = and i32 [[TMP8]], 240
+; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP2]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP7]], [[TMP10]]
+; CHECK-NEXT:    ret i32 [[TMP11]]
+;
+  %1 = and i32 %x, 6
+  %2 = shl nuw nsw i32 %1, 8
+  %3 = shl nuw nsw i32 %1, 1
+  %4 = and i32 %x, 24
+  %5 = shl nuw nsw i32 %4, 8
+  %6 = shl nuw nsw i32 %4, 1
+  %7 = and i32 %x, 96
+  %8 = shl nuw nsw i32 %7, 8
+  %9 = shl nuw nsw i32 %7, 1
+  %10 = or i32 %2, %5
+  %11 = or i32 %8, %10
+  %12 = or i32 %9, %6
+  %13 = or i32 %3, %12
+  %14 = or i32 %11, %13
+  ret i32 %14
+}
+
+define i32 @multiuse3(i32 %x) {
+; CHECK-LABEL: @multiuse3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %x, 96
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 6
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 %x, 6
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 1920
+; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr i32 %x, 1
+; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], 15
+; CHECK-NEXT:    [[TMP9:%.*]] = or i32 [[TMP3]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP6]]
+; CHECK-NEXT:    ret i32 [[TMP10]]
+;
+  %1 = and i32 %x, 96
+  %2 = shl nuw nsw i32 %1, 6
+  %3 = lshr exact i32 %1, 1
+  %4 = shl i32 %x, 6
+  %5 = and i32 %4, 1920
+  %6 = or i32 %2, %5
+  %7 = lshr i32 %x, 1
+  %8 = and i32 %7, 15
+  %9 = or i32 %3, %8
+  %10 = or i32 %9, %6
+  ret i32 %10
+}
+
+define i32 @multiuse4(i32 %x) local_unnamed_addr #0 {
+; CHECK-LABEL: @multiuse4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %x, 100663296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 %x, -1
+; CHECK-NEXT:    br i1 [[TMP2]], label %if, label %else
+; CHECK:         {{.*}}if:{{.*}}
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact i32 [[TMP1]], 22
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 %x, 22
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 480
+; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP3]]
+; CHECK-NEXT:    br label %end
+; CHECK:         {{.*}}else:{{.*}}
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr exact i32 [[TMP1]], 17
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i32 %x, 17
+; CHECK-NEXT:    [[TMP9:%.*]] = and i32 [[TMP8]], 15360
+; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP7]]
+; CHECK-NEXT:    br label %end
+; CHECK:         {{.*}}end{{.*}}
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ [[TMP6]], %if ], [ [[TMP10]], %else ]
+; CHECK-NEXT:    ret i32 [[TMP11]]
+;
+  %1 = and i32 %x, 100663296
+  %2 = icmp sgt i32 %x, -1
+  br i1 %2, label %if, label %else
+
+if:
+  %3 = lshr exact i32 %1, 22
+  %4 = lshr i32 %x, 22
+  %5 = and i32 %4, 480
+  %6 = or i32 %5, %3
+  br label %end
+
+else:
+  %7 = lshr exact i32 %1, 17
+  %8 = lshr i32 %x, 17
+  %9 = and i32 %8, 15360
+  %10 = or i32 %9, %7
+  br label %end
+
+end:
+  %11 = phi i32 [ %6, %if ], [ %10, %else ]
+  ret i32 %11
+}
+
+define i32 @multiuse5(i32 %x) local_unnamed_addr #0 {
+; CHECK-LABEL: @multiuse5(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 5
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 %x, -1
+; CHECK-NEXT:    br i1 [[TMP2]], label %if, label %else
+; CHECK:         {{.*}}if:{{.*}}
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], 21760
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 %x, 5
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 43520
+; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP3]]
+; CHECK-NEXT:    br label %end
+; CHECK:         {{.*}}else:{{.*}}
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP1]], 5570560
+; CHECK-NEXT:    [[TMP8:%.*]] = shl i32 %x, 5
+; CHECK-NEXT:    [[TMP9:%.*]] = and i32 [[TMP8]], 11141120
+; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP7]]
+; CHECK-NEXT:    br label %end
+; CHECK:         {{.*}}end{{.*}}
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ [[TMP6]], %if ], [ [[TMP10]], %else ]
+; CHECK-NEXT:    ret i32 [[TMP11]]
+;
+  %1 = shl i32 %x, 5
+  %2 = icmp sgt i32 %x, -1
+  br i1 %2, label %if, label %else
+
+if:
+  %3 = and i32 %1, 21760
+  %4 = and i32 %x, 1360
+  %5 = shl nuw nsw i32 %4, 5
+  %6 = or i32 %5, %3
+  br label %end
+
+else:
+  %7 = and i32 %1, 5570560
+  %8 = and i32 %x, 348160
+  %9 = shl nuw nsw i32 %8, 5
+  %10 = or i32 %9, %7
+  br label %end
+
+end:
+  %11 = phi i32 [ %6, %if ], [ %10, %else ]
+  ret i32 %11
+}
+
diff --git a/llvm/test/Transforms/InstCombine/or-xor.ll b/llvm/test/Transforms/InstCombine/or-xor.ll
new file mode 100644
index 00000000000..ab5f2f8ef53
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or-xor.ll
@@ -0,0 +1,416 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; X | ~(X | Y) --> X | ~Y
+
+define i32 @test1(i32 %x, i32 %y) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[Y_NOT:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[Z:%.*]] = or i32 [[Y_NOT]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %or = or i32 %x, %y
+  %not = xor i32 %or, -1
+  %z = or i32 %x, %not
+  ret i32 %z
+}
+
+; Commute (rename) the inner 'or' operands:
+; Y | ~(X | Y) --> ~X | Y
+
+define i32 @test2(i32 %x, i32 %y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[Z:%.*]] = or i32 [[X_NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %or = or i32 %x, %y
+  %not = xor i32 %or, -1
+  %z = or i32 %y, %not
+  ret i32 %z
+}
+
+; X | ~(X ^ Y) --> X | ~Y
+
+define i32 @test3(i32 %x, i32 %y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[Y_NOT:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[Z:%.*]] = or i32 [[Y_NOT]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %xor = xor i32 %x, %y
+  %not = xor i32 %xor, -1
+  %z = or i32 %x, %not
+  ret i32 %z
+}
+
+; Commute (rename) the 'xor' operands:
+; Y | ~(X ^ Y) --> ~X | Y
+
+define i32 @test4(i32 %x, i32 %y) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[Z:%.*]] = or i32 [[X_NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %xor = xor i32 %x, %y
+  %not = xor i32 %xor, -1
+  %z = or i32 %y, %not
+  ret i32 %z
+}
+
+define i32 @test5(i32 %x, i32 %y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i32 -1
+;
+  %and = and i32 %x, %y
+  %not = xor i32 %and, -1
+  %z = or i32 %x, %not
+  ret i32 %z
+}
+
+define i32 @test6(i32 %x, i32 %y) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i32 -1
+;
+  %and = and i32 %x, %y
+  %not = xor i32 %and, -1
+  %z = or i32 %y, %not
+  ret i32 %z
+}
+
+define i32 @test7(i32 %x, i32 %y) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[Z:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %xor = xor i32 %x, %y
+  %z = or i32 %y, %xor
+  ret i32 %z
+}
+
+define i32 @test8(i32 %x, i32 %y) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[Z:%.*]] = or i32 [[X_NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %not = xor i32 %y, -1
+  %xor = xor i32 %x, %not
+  %z = or i32 %y, %xor
+  ret i32 %z
+}
+
+define i32 @test9(i32 %x, i32 %y) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[Y_NOT:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[Z:%.*]] = or i32 [[Y_NOT]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %not = xor i32 %x, -1
+  %xor = xor i32 %not, %y
+  %z = or i32 %x, %xor
+  ret i32 %z
+}
+
+define i32 @test10(i32 %A, i32 %B) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i32 -1
+;
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %or = or i32 %xor1, %xor2
+  ret i32 %or
+}
+
+define i32 @test10_commuted(i32 %A, i32 %B) {
+; CHECK-LABEL: @test10_commuted(
+; CHECK-NEXT:    ret i32 -1
+;
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %or = or i32 %xor2, %xor1
+  ret i32 %or
+}
+
+; (x | y) & ((~x) ^ y) -> (x & y)
+define i32 @test11(i32 %x, i32 %y) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %or = or i32 %x, %y
+  %neg = xor i32 %x, -1
+  %xor = xor i32 %neg, %y
+  %and = and i32 %or, %xor
+  ret i32 %and
+}
+
+; ((~x) ^ y) & (x | y) -> (x & y)
+define i32 @test12(i32 %x, i32 %y) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %neg = xor i32 %x, -1
+  %xor = xor i32 %neg, %y
+  %or = or i32 %x, %y
+  %and = and i32 %xor, %or
+  ret i32 %and
+}
+
+define i32 @test12_commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test12_commuted(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %neg = xor i32 %x, -1
+  %xor = xor i32 %neg, %y
+  %or = or i32 %y, %x
+  %and = and i32 %xor, %or
+  ret i32 %and
+}
+
+; ((x | y) ^ (x ^ y)) -> (x & y)
+define i32 @test13(i32 %x, i32 %y) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = xor i32 %y, %x
+  %2 = or i32 %y, %x
+  %3 = xor i32 %2, %1
+  ret i32 %3
+}
+
+; ((x | ~y) ^ (~x | y)) -> x ^ y
+define i32 @test14(i32 %x, i32 %y) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %x, %noty
+  %or2 = or i32 %notx, %y
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+define i32 @test14_commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test14_commuted(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %noty, %x
+  %or2 = or i32 %notx, %y
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; ((x & ~y) ^ (~x & y)) -> x ^ y
+define i32 @test15(i32 %x, i32 %y) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %and1 = and i32 %x, %noty
+  %and2 = and i32 %notx, %y
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+}
+
+define i32 @test15_commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test15_commuted(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %and1 = and i32 %noty, %x
+  %and2 = and i32 %notx, %y
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+}
+
+define i32 @test16(i32 %a, i32 %b) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 1
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %or = xor i32 %a, %b
+  %and1 = and i32 %or, 1
+  %and2 = and i32 %b, -2
+  %xor = or i32 %and1, %and2
+  ret i32 %xor
+}
+
+define i8 @not_or(i8 %x) {
+; CHECK-LABEL: @not_or(
+; CHECK-NEXT:    [[NOTX:%.*]] = or i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[OR:%.*]] = xor i8 [[NOTX]], -8
+; CHECK-NEXT:    ret i8 [[OR]]
+;
+  %notx = xor i8 %x, -1
+  %or = or i8 %notx, 7
+  ret i8 %or
+}
+
+define i8 @not_or_xor(i8 %x) {
+; CHECK-LABEL: @not_or_xor(
+; CHECK-NEXT:    [[NOTX:%.*]] = or i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[NOTX]], -12
+; CHECK-NEXT:    ret i8 [[XOR]]
+;
+  %notx = xor i8 %x, -1
+  %or = or i8 %notx, 7
+  %xor = xor i8 %or, 12
+  ret i8 %xor
+}
+
+define i8 @xor_or(i8 %x) {
+; CHECK-LABEL: @xor_or(
+; CHECK-NEXT:    [[XOR:%.*]] = or i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[OR:%.*]] = xor i8 [[XOR]], 32
+; CHECK-NEXT:    ret i8 [[OR]]
+;
+  %xor = xor i8 %x, 32
+  %or = or i8 %xor, 7
+  ret i8 %or
+}
+
+define i8 @xor_or2(i8 %x) {
+; CHECK-LABEL: @xor_or2(
+; CHECK-NEXT:    [[XOR:%.*]] = or i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[OR:%.*]] = xor i8 [[XOR]], 32
+; CHECK-NEXT:    ret i8 [[OR]]
+;
+  %xor = xor i8 %x, 33
+  %or = or i8 %xor, 7
+  ret i8 %or
+}
+
+define i8 @xor_or_xor(i8 %x) {
+; CHECK-LABEL: @xor_or_xor(
+; CHECK-NEXT:    [[XOR1:%.*]] = or i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i8 [[XOR1]], 44
+; CHECK-NEXT:    ret i8 [[XOR2]]
+;
+  %xor1 = xor i8 %x, 33
+  %or = or i8 %xor1, 7
+  %xor2 = xor i8 %or, 12
+  ret i8 %xor2
+}
+
+define i8 @or_xor_or(i8 %x) {
+; CHECK-LABEL: @or_xor_or(
+; CHECK-NEXT:    [[XOR:%.*]] = or i8 [[X:%.*]], 39
+; CHECK-NEXT:    [[OR2:%.*]] = xor i8 [[XOR]], 8
+; CHECK-NEXT:    ret i8 [[OR2]]
+;
+  %or1 = or i8 %x, 33
+  %xor = xor i8 %or1, 12
+  %or2 = or i8 %xor, 7
+  ret i8 %or2
+}
+
+define i8 @test17(i8 %A, i8 %B) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A]], 33
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[XOR1]], 33
+; CHECK-NEXT:    [[RES:%.*]] = mul i8 [[OR]], [[XOR2]]
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %xor1 = xor i8 %B, %A
+  %not = xor i8 %A, 33
+  %xor2 = xor i8 %not, %B
+  %or = or i8 %xor1, %xor2
+  %res = mul i8 %or, %xor2 ; to increase the use count for the xor
+  ret i8 %res
+}
+
+define i8 @test18(i8 %A, i8 %B) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A]], 33
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[XOR1]], 33
+; CHECK-NEXT:    [[RES:%.*]] = mul i8 [[OR]], [[XOR2]]
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %xor1 = xor i8 %B, %A
+  %not = xor i8 %A, 33
+  %xor2 = xor i8 %not, %B
+  %or = or i8 %xor2, %xor1
+  %res = mul i8 %or, %xor2 ; to increase the use count for the xor
+  ret i8 %res
+}
+
+; ((x | y) ^ (~x | ~y)) -> ~(x ^ y)
+define i32 @test19(i32 %x, i32 %y) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %x, %y
+  %or2 = or i32 %notx, %noty
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; ((x | y) ^ (~y | ~x)) -> ~(x ^ y)
+define i32 @test20(i32 %x, i32 %y) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %x, %y
+  %or2 = or i32 %noty, %notx
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; ((~x | ~y) ^ (x | y)) -> ~(x ^ y)
+define i32 @test21(i32 %x, i32 %y) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %notx, %noty
+  %or2 = or i32 %x, %y
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; ((~x | ~y) ^ (y | x)) -> ~(x ^ y)
+define i32 @test22(i32 %x, i32 %y) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %notx, %noty
+  %or2 = or i32 %y, %x
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
new file mode 100644
index 00000000000..d3f3d531115
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -0,0 +1,843 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define i32 @test12(i32 %A) {
+        ; Should be eliminated
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[A:%.*]], 8
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = or i32 %A, 4
+  %C = and i32 %B, 8
+  ret i32 %C
+}
+
+define i32 @test13(i32 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret i32 8
+;
+  %B = or i32 %A, 12
+  ; Always equal to 8
+  %C = and i32 %B, 8
+  ret i32 %C
+}
+
+define i1 @test14(i32 %A, i32 %B) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %C1 = icmp ult i32 %A, %B
+  %C2 = icmp ugt i32 %A, %B
+  ; (A < B) | (A > B) === A != B
+  %D = or i1 %C1, %C2
+  ret i1 %D
+}
+
+define i1 @test15(i32 %A, i32 %B) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %C1 = icmp ult i32 %A, %B
+  %C2 = icmp eq i32 %A, %B
+  ; (A < B) | (A == B) === A <= B
+  %D = or i1 %C1, %C2
+  ret i1 %D
+}
+
+define i32 @test16(i32 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = and i32 %A, 1
+  ; -2 = ~1
+  %C = and i32 %A, -2
+  ; %D = and int %B, -1 == %B
+  %D = or i32 %B, %C
+  ret i32 %D
+}
+
+define i32 @test17(i32 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[D:%.*]] = and i32 [[A:%.*]], 5
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %B = and i32 %A, 1
+  %C = and i32 %A, 4
+  ; %D = and int %B, 5
+  %D = or i32 %B, %C
+  ret i32 %D
+}
+
+define i1 @test18(i32 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -50
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[A_OFF]], 49
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = icmp sge i32 %A, 100
+  %C = icmp slt i32 %A, 50
+  %D = or i1 %B, %C
+  ret i1 %D
+}
+
+; FIXME: Vectors should fold too.
+define <2 x i1> @test18vec(<2 x i32> %A) {
+; CHECK-LABEL: @test18vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 99, i32 99>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A]], <i32 50, i32 50>
+; CHECK-NEXT:    [[D:%.*]] = or <2 x i1> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = icmp sge <2 x i32> %A, <i32 100, i32 100>
+  %C = icmp slt <2 x i32> %A, <i32 50, i32 50>
+  %D = or <2 x i1> %B, %C
+  ret <2 x i1> %D
+}
+
+define i32 @test20(i32 %x) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %y = and i32 %x, 123
+  %z = or i32 %y, %x
+  ret i32 %z
+}
+
+define i32 @test21(i32 %tmp.1) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[TMP_1_MASK1:%.*]] = add i32 [[TMP_1:%.*]], 2
+; CHECK-NEXT:    ret i32 [[TMP_1_MASK1]]
+;
+  %tmp.1.mask1 = add i32 %tmp.1, 2
+  %tmp.3 = and i32 %tmp.1.mask1, -2
+  %tmp.5 = and i32 %tmp.1, 1
+  ;; add tmp.1, 2
+  %tmp.6 = or i32 %tmp.5, %tmp.3
+  ret i32 %tmp.6
+}
+
+define i32 @test22(i32 %B) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    ret i32 [[B:%.*]]
+;
+  %ELIM41 = and i32 %B, 1
+  %ELIM7 = and i32 %B, -2
+  %ELIM5 = or i32 %ELIM41, %ELIM7
+  ret i32 %ELIM5
+}
+
+define i16 @test23(i16 %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[B:%.*]] = lshr i16 [[A:%.*]], 1
+; CHECK-NEXT:    [[D:%.*]] = xor i16 [[B]], -24575
+; CHECK-NEXT:    ret i16 [[D]]
+;
+  %B = lshr i16 %A, 1
+  ;; fold or into xor
+  %C = or i16 %B, -32768
+  %D = xor i16 %C, 8193
+  ret i16 %D
+}
+
+define <2 x i16> @test23vec(<2 x i16> %A) {
+; CHECK-LABEL: @test23vec(
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 1, i16 1>
+; CHECK-NEXT:    [[D:%.*]] = xor <2 x i16> [[B]], <i16 -24575, i16 -24575>
+; CHECK-NEXT:    ret <2 x i16> [[D]]
+;
+  %B = lshr <2 x i16> %A, <i16 1, i16 1>
+  ;; fold or into xor
+  %C = or <2 x i16> %B, <i16 -32768, i16 -32768>
+  %D = xor <2 x i16> %C, <i16 8193, i16 8193>
+  ret <2 x i16> %D
+}
+
+; PR3266 & PR5276
+define i1 @test25(i32 %A, i32 %B) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i32 [[B:%.*]], 57
+; CHECK-NEXT:    [[F:%.*]] = and i1 [[D]], [[C]]
+; CHECK-NEXT:    ret i1 [[F]]
+;
+  %C = icmp eq i32 %A, 0
+  %D = icmp eq i32 %B, 57
+  %E = or i1 %C, %D
+  %F = xor i1 %E, -1
+  ret i1 %F
+}
+
+; PR5634
+define i1 @test26(i32 %A, i32 %B) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %C1 = icmp eq i32 %A, 0
+  %C2 = icmp eq i32 %B, 0
+  ; (A == 0) & (A == 0)   -->   (A|B) == 0
+  %D = and i1 %C1, %C2
+  ret i1 %D
+}
+
+define i1 @test27(i32* %A, i32* %B) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32* [[A:%.*]], null
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[B:%.*]], null
+; CHECK-NEXT:    [[E:%.*]] = and i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[E]]
+;
+  %C1 = ptrtoint i32* %A to i32
+  %C2 = ptrtoint i32* %B to i32
+  %D = or i32 %C1, %C2
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+}
+
+define <2 x i1> @test27vec(<2 x i32*> %A, <2 x i32*> %B) {
+; CHECK-LABEL: @test27vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32*> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32*> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = and <2 x i1> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i1> [[E]]
+;
+  %C1 = ptrtoint <2 x i32*> %A to <2 x i32>
+  %C2 = ptrtoint <2 x i32*> %B to <2 x i32>
+  %D = or <2 x i32> %C1, %C2
+  %E = icmp eq <2 x i32> %D, zeroinitializer
+  ret <2 x i1> %E
+}
+
+; PR5634
+define i1 @test28(i32 %A, i32 %B) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %C1 = icmp ne i32 %A, 0
+  %C2 = icmp ne i32 %B, 0
+  ; (A != 0) | (A != 0)   -->   (A|B) != 0
+  %D = or i1 %C1, %C2
+  ret i1 %D
+}
+
+define i1 @test29(i32* %A, i32* %B) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32* [[A:%.*]], null
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32* [[B:%.*]], null
+; CHECK-NEXT:    [[E:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[E]]
+;
+  %C1 = ptrtoint i32* %A to i32
+  %C2 = ptrtoint i32* %B to i32
+  %D = or i32 %C1, %C2
+  %E = icmp ne i32 %D, 0
+  ret i1 %E
+}
+
+define <2 x i1> @test29vec(<2 x i32*> %A, <2 x i32*> %B) {
+; CHECK-LABEL: @test29vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32*> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32*> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i1> [[E]]
+;
+  %C1 = ptrtoint <2 x i32*> %A to <2 x i32>
+  %C2 = ptrtoint <2 x i32*> %B to <2 x i32>
+  %D = or <2 x i32> %C1, %C2
+  %E = icmp ne <2 x i32> %D, zeroinitializer
+  ret <2 x i1> %E
+}
+
+; PR4216
+define i32 @test30(i32 %A) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[D:%.*]] = and i32 [[A:%.*]], -58312
+; CHECK-NEXT:    [[E:%.*]] = or i32 [[D]], 32962
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %B = or i32 %A, 32962
+  %C = and i32 %A, -65536
+  %D = and i32 %B, 40186
+  %E = or i32 %D, %C
+  ret i32 %E
+}
+
+define <2 x i32> @test30vec(<2 x i32> %A) {
+; CHECK-LABEL: @test30vec(
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i32> [[A:%.*]], <i32 -65536, i32 -65536>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A]], <i32 7224, i32 7224>
+; CHECK-NEXT:    [[D:%.*]] = or <2 x i32> [[B]], <i32 32962, i32 32962>
+; CHECK-NEXT:    [[E:%.*]] = or <2 x i32> [[D]], [[C]]
+; CHECK-NEXT:    ret <2 x i32> [[E]]
+;
+  %B = or <2 x i32> %A, <i32 32962, i32 32962>
+  %C = and <2 x i32> %A, <i32 -65536, i32 -65536>
+  %D = and <2 x i32> %B, <i32 40186, i32 40186>
+  %E = or <2 x i32> %D, %C
+  ret <2 x i32> %E
+}
+
+; PR4216
+define i64 @test31(i64 %A) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[E:%.*]] = and i64 [[A:%.*]], 4294908984
+; CHECK-NEXT:    [[F:%.*]] = or i64 [[E]], 32962
+; CHECK-NEXT:    ret i64 [[F]]
+;
+  %B = or i64 %A, 194
+  %D = and i64 %B, 250
+
+  %C = or i64 %A, 32768
+  %E = and i64 %C, 4294941696
+
+  %F = or i64 %D, %E
+  ret i64 %F
+}
+
+define <2 x i64> @test31vec(<2 x i64> %A) {
+; CHECK-LABEL: @test31vec(
+; CHECK-NEXT:    [[E:%.*]] = and <2 x i64> [[A:%.*]], <i64 4294908984, i64 4294908984>
+; CHECK-NEXT:    [[F:%.*]] = or <2 x i64> [[E]], <i64 32962, i64 32962>
+; CHECK-NEXT:    ret <2 x i64> [[F]]
+;
+  %B = or <2 x i64> %A, <i64 194, i64 194>
+  %D = and <2 x i64> %B, <i64 250, i64 250>
+
+  %C = or <2 x i64> %A, <i64 32768, i64 32768>
+  %E = and <2 x i64> %C, <i64 4294941696, i64 4294941696>
+
+  %F = or <2 x i64> %D, %E
+  ret <2 x i64> %F
+}
+
+; codegen is mature enough to handle vector selects.
+define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[AND_I1352:%.*]], <4 x i32> [[VECINIT6_I176:%.*]], <4 x i32> [[VECINIT6_I191:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %and.i135 = sext <4 x i1> %and.i1352 to <4 x i32>
+  %and.i129 = and <4 x i32> %vecinit6.i176, %and.i135
+  %neg.i = xor <4 x i32> %and.i135, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %and.i = and <4 x i32> %vecinit6.i191, %neg.i
+  %or.i = or <4 x i32> %and.i, %and.i129
+  ret <4 x i32> %or.i
+}
+
+define i1 @test33(i1 %X, i1 %Y) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[B:%.*]] = or i1 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = or i1 %X, %Y
+  %b = or i1 %a, %X
+  ret i1 %b
+}
+
+define i32 @test34(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    [[B:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %a = or i32 %X, %Y
+  %b = or i32 %Y, %a
+  ret i32 %b
+}
+
+define i32 @test35(i32 %a, i32 %b) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 1135
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %1 = or i32 %a, 1135
+  %2 = or i32 %1, %b
+  ret i32 %2
+}
+
+define i1 @test36(i32 %x) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -23
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 23
+  %cmp2 = icmp eq i32 %x, 24
+  %ret1 = or i1 %cmp1, %cmp2
+  %cmp3 = icmp eq i32 %x, 25
+  %ret2 = or i1 %ret1, %cmp3
+  ret i1 %ret2
+}
+
+define i32 @orsext_to_sel(i32 %x, i1 %y) {
+; CHECK-LABEL: @orsext_to_sel(
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[Y:%.*]], i32 -1, i32 [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %sext = sext i1 %y to i32
+  %or = or i32 %sext, %x
+  ret i32 %or
+}
+
+define i32 @orsext_to_sel_swap(i32 %x, i1 %y) {
+; CHECK-LABEL: @orsext_to_sel_swap(
+; CHECK-NEXT:    [[OR:%.*]] = select i1 [[Y:%.*]], i32 -1, i32 [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %sext = sext i1 %y to i32
+  %or = or i32 %x, %sext
+  ret i32 %or
+}
+
+define i32 @orsext_to_sel_multi_use(i32 %x, i1 %y) {
+; CHECK-LABEL: @orsext_to_sel_multi_use(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i1 [[Y:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SEXT]], [[X:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[OR]], [[SEXT]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %sext = sext i1 %y to i32
+  %or = or i32 %sext, %x
+  %add = add i32 %sext, %or
+  ret i32 %add
+}
+
+define <2 x i32> @orsext_to_sel_vec(<2 x i32> %x, <2 x i1> %y) {
+; CHECK-LABEL: @orsext_to_sel_vec(
+; CHECK-NEXT:    [[OR:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[OR]]
+;
+  %sext = sext <2 x i1> %y to <2 x i32>
+  %or = or <2 x i32> %sext, %x
+  ret <2 x i32> %or
+}
+
+define <2 x i132> @orsext_to_sel_vec_swap(<2 x i132> %x, <2 x i1> %y) {
+; CHECK-LABEL: @orsext_to_sel_vec_swap(
+; CHECK-NEXT:    [[OR:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i132> <i132 -1, i132 -1>, <2 x i132> [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i132> [[OR]]
+;
+  %sext = sext <2 x i1> %y to <2 x i132>
+  %or = or <2 x i132> %x, %sext
+  ret <2 x i132> %or
+}
+
+; (~A & B) | A --> A | B
+
+define i32 @test39a(i32 %a, float %b) {
+; CHECK-LABEL: @test39a(
+; CHECK-NEXT:    [[A1:%.*]] = mul i32 [[A:%.*]], 42
+; CHECK-NEXT:    [[B1:%.*]] = bitcast float [[B:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a1 = mul i32 %a, 42          ; thwart complexity-based ordering
+  %b1 = bitcast float %b to i32 ; thwart complexity-based ordering
+  %nota = xor i32 %a1, -1
+  %and = and i32 %nota, %b1
+  %or = or i32 %and, %a1
+  ret i32 %or
+}
+
+; Commute 'and' operands:
+; (B & ~A) | A --> A | B
+
+define i32 @test39b(i32 %a, float %b) {
+; CHECK-LABEL: @test39b(
+; CHECK-NEXT:    [[A1:%.*]] = mul i32 [[A:%.*]], 42
+; CHECK-NEXT:    [[B1:%.*]] = bitcast float [[B:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a1 = mul i32 %a, 42          ; thwart complexity-based ordering
+  %b1 = bitcast float %b to i32 ; thwart complexity-based ordering
+  %nota = xor i32 %a1, -1
+  %and = and i32 %b1, %nota
+  %or = or i32 %and, %a1
+  ret i32 %or
+}
+
+; Commute 'or' operands:
+; A | (~A & B) --> A | B
+
+define i32 @test39c(i32 %a, float %b) {
+; CHECK-LABEL: @test39c(
+; CHECK-NEXT:    [[A1:%.*]] = mul i32 [[A:%.*]], 42
+; CHECK-NEXT:    [[B1:%.*]] = bitcast float [[B:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a1 = mul i32 %a, 42          ; thwart complexity-based ordering
+  %b1 = bitcast float %b to i32 ; thwart complexity-based ordering
+  %nota = xor i32 %a1, -1
+  %and = and i32 %nota, %b1
+  %or = or i32 %a1, %and
+  ret i32 %or
+}
+
+; Commute 'and' operands:
+; A | (B & ~A) --> A | B
+
+define i32 @test39d(i32 %a, float %b) {
+; CHECK-LABEL: @test39d(
+; CHECK-NEXT:    [[A1:%.*]] = mul i32 [[A:%.*]], 42
+; CHECK-NEXT:    [[B1:%.*]] = bitcast float [[B:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a1 = mul i32 %a, 42          ; thwart complexity-based ordering
+  %b1 = bitcast float %b to i32 ; thwart complexity-based ordering
+  %nota = xor i32 %a1, -1
+  %and = and i32 %b1, %nota
+  %or = or i32 %a1, %and
+  ret i32 %or
+}
+
+define i32 @test40(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XOR]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %a, %b
+  %xor = xor i32 %a, -1
+  %or = or i32 %and, %xor
+  ret i32 %or
+}
+
+define i32 @test40b(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40b(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XOR]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %b, %a
+  %xor = xor i32 %a, -1
+  %or = or i32 %and, %xor
+  ret i32 %or
+}
+
+define i32 @test40c(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40c(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XOR]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %b, %a
+  %xor = xor i32 %a, -1
+  %or = or i32 %xor, %and
+  ret i32 %or
+}
+
+define i32 @test40d(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40d(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[XOR]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %a, %b
+  %xor = xor i32 %a, -1
+  %or = or i32 %xor, %and
+  ret i32 %or
+}
+
+define i32 @test45(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[OR1]]
+;
+  %or = or i32 %y, %z
+  %and = and i32 %x, %or
+  %or1 = or i32 %and, %y
+  ret i32 %or1
+}
+
+define i1 @test46(i8 signext %c)  {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[C:%.*]], -33
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -65
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 26
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %c.off = add i8 %c, -97
+  %cmp1 = icmp ult i8 %c.off, 26
+  %c.off17 = add i8 %c, -65
+  %cmp2 = icmp ult i8 %c.off17, 26
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @test47(i8 signext %c)  {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[C:%.*]], -33
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -65
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 27
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %c.off = add i8 %c, -65
+  %cmp1 = icmp ule i8 %c.off, 26
+  %c.off17 = add i8 %c, -97
+  %cmp2 = icmp ule i8 %c.off17, 26
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i32 @test49(i1 %C) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 1019, i32 123
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = or i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test49vec(i1 %C) {
+; CHECK-LABEL: @test49vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1019, i32 1019>, <2 x i32> <i32 123, i32 123>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = or <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test49vec2(i1 %C) {
+; CHECK-LABEL: @test49vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1019, i32 2509>, <2 x i32> <i32 123, i32 351>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = or <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %V
+}
+
+define i32 @test50(i1 %which) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1019, [[ENTRY:%.*]] ], [ 123, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = or i32 %A, 123
+  ret i32 %value
+}
+
+define <2 x i32> @test50vec(i1 %which) {
+; CHECK-LABEL: @test50vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1019, i32 1019>, [[ENTRY:%.*]] ], [ <i32 123, i32 123>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = or <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test50vec2(i1 %which) {
+; CHECK-LABEL: @test50vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1019, i32 2509>, [[ENTRY:%.*]] ], [ <i32 123, i32 351>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = or <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %value
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (X | (Y & ~X)) -> (X | Y), where 'not' is an inverted cmp
+
+define i1 @or_andn_cmp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @or_andn_cmp_1(
+; CHECK-NEXT:    [[X:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[X]], [[Y]]
+; CHECK-NEXT:    ret i1 [[OR]]
+;
+  %x = icmp sgt i32 %a, %b
+  %x_inv = icmp sle i32 %a, %b
+  %y = icmp ugt i32 %c, 42      ; thwart complexity-based ordering
+  %and = and i1 %y, %x_inv
+  %or = or i1 %x, %and
+  ret i1 %or
+}
+
+; Commute the 'or':
+; ((Y & ~X) | X) -> (X | Y), where 'not' is an inverted cmp
+
+define <2 x i1> @or_andn_cmp_2(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @or_andn_cmp_2(
+; CHECK-NEXT:    [[X:%.*]] = icmp sge <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <2 x i32> [[C:%.*]], <i32 42, i32 47>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i1> [[Y]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[OR]]
+;
+  %x = icmp sge <2 x i32> %a, %b
+  %x_inv = icmp slt <2 x i32> %a, %b
+  %y = icmp ugt <2 x i32> %c, <i32 42, i32 47>      ; thwart complexity-based ordering
+  %and = and <2 x i1> %y, %x_inv
+  %or = or <2 x i1> %and, %x
+  ret <2 x i1> %or
+}
+
+; Commute the 'and':
+; (X | (~X & Y)) -> (X | Y), where 'not' is an inverted cmp
+
+define i1 @or_andn_cmp_3(i72 %a, i72 %b, i72 %c) {
+; CHECK-LABEL: @or_andn_cmp_3(
+; CHECK-NEXT:    [[X:%.*]] = icmp ugt i72 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i72 [[C:%.*]], 42
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[X]], [[Y]]
+; CHECK-NEXT:    ret i1 [[OR]]
+;
+  %x = icmp ugt i72 %a, %b
+  %x_inv = icmp ule i72 %a, %b
+  %y = icmp ugt i72 %c, 42      ; thwart complexity-based ordering
+  %and = and i1 %x_inv, %y
+  %or = or i1 %x, %and
+  ret i1 %or
+}
+
+; Commute the 'or':
+; ((~X & Y) | X) -> (X | Y), where 'not' is an inverted cmp
+
+define <3 x i1> @or_andn_cmp_4(<3 x i32> %a, <3 x i32> %b, <3 x i32> %c) {
+; CHECK-LABEL: @or_andn_cmp_4(
+; CHECK-NEXT:    [[X:%.*]] = icmp eq <3 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <3 x i32> [[C:%.*]], <i32 42, i32 43, i32 -1>
+; CHECK-NEXT:    [[OR:%.*]] = or <3 x i1> [[Y]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[OR]]
+;
+  %x = icmp eq <3 x i32> %a, %b
+  %x_inv = icmp ne <3 x i32> %a, %b
+  %y = icmp ugt <3 x i32> %c, <i32 42, i32 43, i32 -1>      ; thwart complexity-based ordering
+  %and = and <3 x i1> %x_inv, %y
+  %or = or <3 x i1> %and, %x
+  ret <3 x i1> %or
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (~X | (Y & X)) -> (~X | Y), where 'not' is an inverted cmp
+
+define i1 @orn_and_cmp_1(i37 %a, i37 %b, i37 %c) {
+; CHECK-LABEL: @orn_and_cmp_1(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i37 [[C:%.*]], 42
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[X_INV]], [[Y]]
+; CHECK-NEXT:    ret i1 [[OR]]
+;
+  %x = icmp sgt i37 %a, %b
+  %x_inv = icmp sle i37 %a, %b
+  %y = icmp ugt i37 %c, 42      ; thwart complexity-based ordering
+  %and = and i1 %y, %x
+  %or = or i1 %x_inv, %and
+  ret i1 %or
+}
+
+; Commute the 'or':
+; ((Y & X) | ~X) -> (~X | Y), where 'not' is an inverted cmp
+
+define i1 @orn_and_cmp_2(i16 %a, i16 %b, i16 %c) {
+; CHECK-LABEL: @orn_and_cmp_2(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i16 [[C:%.*]], 42
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[Y]], [[X_INV]]
+; CHECK-NEXT:    ret i1 [[OR]]
+;
+  %x = icmp sge i16 %a, %b
+  %x_inv = icmp slt i16 %a, %b
+  %y = icmp ugt i16 %c, 42      ; thwart complexity-based ordering
+  %and = and i1 %y, %x
+  %or = or i1 %and, %x_inv
+  ret i1 %or
+}
+
+; Commute the 'and':
+; (~X | (X & Y)) -> (~X | Y), where 'not' is an inverted cmp
+
+define <4 x i1> @orn_and_cmp_3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: @orn_and_cmp_3(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <4 x i32> [[C:%.*]], <i32 42, i32 0, i32 1, i32 -1>
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i1> [[X_INV]], [[Y]]
+; CHECK-NEXT:    ret <4 x i1> [[OR]]
+;
+  %x = icmp ugt <4 x i32> %a, %b
+  %x_inv = icmp ule <4 x i32> %a, %b
+  %y = icmp ugt <4 x i32> %c, <i32 42, i32 0, i32 1, i32 -1>      ; thwart complexity-based ordering
+  %and = and <4 x i1> %x, %y
+  %or = or <4 x i1> %x_inv, %and
+  ret <4 x i1> %or
+}
+
+; Commute the 'or':
+; ((X & Y) | ~X) -> (~X | Y), where 'not' is an inverted cmp
+
+define i1 @orn_and_cmp_4(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @orn_and_cmp_4(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[Y]], [[X_INV]]
+; CHECK-NEXT:    ret i1 [[OR]]
+;
+  %x = icmp eq i32 %a, %b
+  %x_inv = icmp ne i32 %a, %b
+  %y = icmp ugt i32 %c, 42      ; thwart complexity-based ordering
+  %and = and i1 %x, %y
+  %or = or i1 %and, %x_inv
+  ret i1 %or
+}
+
+; The constant vectors are inverses. Make sure we can turn this into a select without crashing trying to truncate the constant to 16xi1.
+define <16 x i1> @test51(<16 x i1> %arg, <16 x i1> %arg1) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i1> [[ARG:%.*]], <16 x i1> [[ARG1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 24, i32 9, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    ret <16 x i1> [[TMP1]]
+;
+  %tmp = and <16 x i1> %arg, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
+  %tmp2 = and <16 x i1> %arg1, <i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>
+  %tmp3 = or <16 x i1> %tmp, %tmp2
+  ret <16 x i1> %tmp3
+}
diff --git a/llvm/test/Transforms/InstCombine/osx-names.ll b/llvm/test/Transforms/InstCombine/osx-names.ll
new file mode 100644
index 00000000000..04d842d0ec2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/osx-names.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/9815881>
+; On OSX x86-32, fwrite and fputs aren't called fwrite and fputs.
+; Make sure we use the correct names.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.2"
+
+%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__sFILEX = type opaque
+
+@.str = private unnamed_addr constant [13 x i8] c"Hello world\0A\00", align 1
+@.str2 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+define void @test1(%struct.__sFILE* %stream) nounwind {
+; CHECK-LABEL: define void @test1(
+; CHECK: call i32 @"fwrite$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+
+define void @test2(%struct.__sFILE* %stream, i8* %str) nounwind ssp {
+; CHECK-LABEL: define void @test2(
+; CHECK: call i32 @"fputs$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
+  ret void
+}
+
+declare i32 @fprintf(%struct.__sFILE*, i8*, ...) nounwind
diff --git a/llvm/test/Transforms/InstCombine/out-of-bounds-indexes.ll b/llvm/test/Transforms/InstCombine/out-of-bounds-indexes.ll
new file mode 100644
index 00000000000..02be57a4d15
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/out-of-bounds-indexes.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Check that we don't crash on unreasonable constant indexes
+
+define i32 @test_out_of_bounds(i32 %a, i1 %x, i1 %y) {
+; CHECK-LABEL: @test_out_of_bounds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[A:%.*]], 3
+; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
+; CHECK-NEXT:    ret i32 [[AND1]]
+;
+entry:
+  %and1 = and i32 %a, 3
+  %B = lshr i32 %and1, -2147483648
+  %cmp = icmp eq i32 %B, 1
+  tail call void @llvm.assume(i1 %cmp)
+  ret i32 %and1
+}
+
+define i128 @test_non64bit(i128 %a) {
+; CHECK-LABEL: @test_non64bit(
+; CHECK-NEXT:    [[AND1:%.*]] = and i128 [[A:%.*]], 3
+; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
+; CHECK-NEXT:    ret i128 [[AND1]]
+;
+  %and1 = and i128 %a, 3
+  %B = lshr i128 %and1, -1
+  %cmp = icmp eq i128 %B, 1
+  tail call void @llvm.assume(i1 %cmp)
+  ret i128 %and1
+}
+
+declare void @llvm.assume(i1)
+
+define <4 x double> @inselt_bad_index(<4 x double> %a) {
+; CHECK-LABEL: @inselt_bad_index(
+; CHECK-NEXT:    ret <4 x double> undef
+;
+  %I = insertelement <4 x double> %a, double 0.0, i64 4294967296
+  ret <4 x double> %I
+}
diff --git a/llvm/test/Transforms/InstCombine/overflow-mul.ll b/llvm/test/Transforms/InstCombine/overflow-mul.ll
new file mode 100644
index 00000000000..bc0504b0a96
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/overflow-mul.ll
@@ -0,0 +1,199 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; return mul(zext x, zext y) > MAX
+define i32 @pr4917_1(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_1(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp ugt i64 %mul64, 4294967295
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; return mul(zext x, zext y) >= MAX+1
+define i32 @pr4917_1a(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_1a(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp uge i64 %mul64, 4294967296
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; mul(zext x, zext y) > MAX
+; mul(x, y) is used
+define i32 @pr4917_2(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_2(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp ugt i64 %mul64, 4294967295
+; CHECK-DAG: [[VAL:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
+  %mul32 = trunc i64 %mul64 to i32
+; CHECK-DAG: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
+  %retval = select i1 %overflow, i32 %mul32, i32 111
+; CHECK: select i1 [[OVFL]], i32 [[VAL]]
+  ret i32 %retval
+}
+
+; return mul(zext x, zext y) > MAX
+; mul is used in non-truncate
+define i64 @pr4917_3(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_3(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+  %mul64 = mul i64 %l, %r
+; CHECK-NOT: umul.with.overflow.i32
+  %overflow = icmp ugt i64 %mul64, 4294967295
+  %retval = select i1 %overflow, i64 %mul64, i64 111
+  ret i64 %retval
+}
+
+; return mul(zext x, zext y) <= MAX
+define i32 @pr4917_4(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_4(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp ule i64 %mul64, 4294967295
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+; CHECK: xor
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; return mul(zext x, zext y) < MAX+1
+define i32 @pr4917_4a(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4917_4a(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+; CHECK-NOT: zext i32
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %overflow = icmp ult i64 %mul64, 4294967296
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+; CHECK: xor
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; operands of mul are of different size
+define i32 @pr4917_5(i32 %x, i8 %y) nounwind {
+; CHECK-LABEL: @pr4917_5(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i8 %y to i64
+; CHECK: [[Y:%.*]] = zext i8 %y to i32
+  %mul64 = mul i64 %l, %r
+  %overflow = icmp ugt i64 %mul64, 4294967295
+  %mul32 = trunc i64 %mul64 to i32
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 [[Y]])
+; CHECK-DAG: [[VAL:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
+; CHECK-DAG: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
+  %retval = select i1 %overflow, i32 %mul32, i32 111
+; CHECK: select i1 [[OVFL]], i32 [[VAL]]
+  ret i32 %retval
+}
+
+; mul(zext x, zext y) != zext trunc mul
+define i32 @pr4918_1(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4918_1(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %part32 = trunc i64 %mul64 to i32
+  %part64 = zext i32 %part32 to i64
+  %overflow = icmp ne i64 %mul64, %part64
+; CHECK: [[OVFL:%.*]] = extractvalue { i32, i1 } [[MUL:%.*]], 1
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+; mul(zext x, zext y) == zext trunc mul
+define i32 @pr4918_2(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4918_2(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %part32 = trunc i64 %mul64 to i32
+  %part64 = zext i32 %part32 to i64
+  %overflow = icmp eq i64 %mul64, %part64
+; CHECK: extractvalue { i32, i1 } [[MUL]]
+  %retval = zext i1 %overflow to i32
+; CHECK: xor
+  ret i32 %retval
+}
+
+; zext trunc mul != mul(zext x, zext y)
+define i32 @pr4918_3(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @pr4918_3(
+entry:
+  %l = zext i32 %x to i64
+  %r = zext i32 %y to i64
+  %mul64 = mul i64 %l, %r
+; CHECK: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y)
+  %part32 = trunc i64 %mul64 to i32
+  %part64 = zext i32 %part32 to i64
+  %overflow = icmp ne i64 %part64, %mul64
+; CHECK: extractvalue { i32, i1 } [[MUL]], 1
+  %retval = zext i1 %overflow to i32
+  ret i32 %retval
+}
+
+define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: @pr20113
+; CHECK-NOT: mul.with.overflow
+; CHECK: ret
+  %vmovl.i.i726 = zext <4 x i16> %a to <4 x i32>
+  %vmovl.i.i712 = zext <4 x i16> %b to <4 x i32>
+  %mul.i703 = mul <4 x i32> %vmovl.i.i712, %vmovl.i.i726
+  %tmp = icmp sge <4 x i32> %mul.i703, zeroinitializer
+  %vcgez.i = sext <4 x i1> %tmp to <4 x i32>
+  ret <4 x i32> %vcgez.i
+}
+
+
+; The last test needs this weird datalayout.
+target datalayout = "i32:8:8"
+; Without it, InstCombine will align the pointed on 4 Bytes
+; The KnownBitsZero that result from the alignment allows to
+; turn:
+;    and i32 %mul, 255
+; to:
+;    and i32 %mul, 252
+; The mask is no longer in the form 2^n-1  and this prevents the transformation.
+
+@pr21445_data = external global i32
+define i1 @pr21445(i8 %a) {
+; CHECK-LABEL: @pr21445(
+; CHECK-NEXT:  %[[umul:.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 ptrtoint (i32* @pr21445_data to i8))
+; CHECK-NEXT:  %[[cmp:.*]] = extractvalue { i8, i1 } %[[umul]], 1
+; CHECK-NEXT:  ret i1 %[[cmp]]
+  %ext = zext i8 %a to i32
+  %mul = mul i32 %ext, zext (i8 ptrtoint (i32* @pr21445_data to i8) to i32)
+  %and = and i32 %mul, 255
+  %cmp = icmp ne i32 %mul, %and
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/overflow.ll b/llvm/test/Transforms/InstCombine/overflow.ll
new file mode 100644
index 00000000000..f5558890d13
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/overflow.ll
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; <rdar://problem/8558713>
+
+declare void @throwAnExceptionOrWhatever()
+
+define i32 @test1(i32 %a, i32 %b) nounwind ssp {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SADD:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]])
+; CHECK-NEXT:    [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractvalue { i32, i1 } [[SADD]], 1
+; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret i32 [[SADD_RESULT]]
+;
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  %conv9 = trunc i64 %add to i32
+  ret i32 %conv9
+}
+
+; This form should not be promoted for two reasons: 1) it is unprofitable to
+; promote it since the add.off instruction has another use, and 2) it is unsafe
+; because the add-with-off makes the high bits of the original add live.
+
+define i32 @test2(i32 %a, i32 %b, i64* %P) nounwind ssp {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[CONV2]], [[CONV]]
+; CHECK-NEXT:    [[ADD_OFF:%.*]] = add nsw i64 [[ADD]], 2147483648
+; CHECK-NEXT:    store i64 [[ADD_OFF]], i64* [[P:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[ADD_OFF]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CONV9:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    ret i32 [[CONV9]]
+;
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+  store i64 %add.off, i64* %P
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  %conv9 = trunc i64 %add to i32
+  ret i32 %conv9
+}
+
+; PR8816
+; This is illegal to transform because the high bits of the original add are
+; live out.
+define i64 @test3(i32 %a, i32 %b) nounwind ssp {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[CONV2]], [[CONV]]
+; CHECK-NEXT:    [[ADD_OFF:%.*]] = add nsw i64 [[ADD]], 2147483648
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[ADD_OFF]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  ret i64 %add
+}
+
+; Should be able to form an i8 sadd computed in an i32.
+
+define zeroext i8 @test4(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SADD:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[B:%.*]], i8 [[A:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = extractvalue { i8, i1 } [[SADD]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #2
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    [[SADD_RESULT:%.*]] = extractvalue { i8, i1 } [[SADD]], 0
+; CHECK-NEXT:    ret i8 [[SADD_RESULT]]
+;
+entry:
+  %conv = sext i8 %a to i32
+  %conv2 = sext i8 %b to i32
+  %add = add nsw i32 %conv2, %conv
+  %add4 = add nsw i32 %add, 128
+  %cmp = icmp ugt i32 %add4, 255
+  br i1 %cmp, label %if.then, label %if.end
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  unreachable
+
+if.end:
+  %conv7 = trunc i32 %add to i8
+  ret i8 %conv7
+}
+
+; PR11438
+; This is @test1, but the operands are not sign-extended.  Make sure
+; we don't transform this case.
+
+define i32 @test8(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD_OFF:%.*]] = add i64 [[ADD]], 2147483648
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[ADD_OFF]], 4294967295
+; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #2
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CONV9:%.*]] = trunc i64 [[ADD]] to i32
+; CHECK-NEXT:    ret i32 [[CONV9]]
+;
+entry:
+  %add = add i64 %a, %b
+  %add.off = add i64 %add, 2147483648
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  %conv9 = trunc i64 %add to i32
+  ret i32 %conv9
+}
+
diff --git a/llvm/test/Transforms/InstCombine/phi-load-metadata-2.ll b/llvm/test/Transforms/InstCombine/phi-load-metadata-2.ll
new file mode 100644
index 00000000000..cfbf2dea8a7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-load-metadata-2.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that dereferenceable metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+  br i1 %c, label %t, label %f
+t:
+  call void @bar()
+  %v1 = load i32*, i32** %p1, align 8, !dereferenceable !0
+  br label %cont
+
+f:
+  call void @baz()
+  %v2 = load i32*, i32** %p2, align 8, !dereferenceable !1
+  br label %cont
+
+cont:
+  %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+  ret i32* %res
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/llvm/test/Transforms/InstCombine/phi-load-metadata-3.ll b/llvm/test/Transforms/InstCombine/phi-load-metadata-3.ll
new file mode 100644
index 00000000000..39049c9c718
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-load-metadata-3.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that dereferenceable_or_null metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+  br i1 %c, label %t, label %f
+t:
+  call void @bar()
+  %v1 = load i32*, i32** %p1, align 8, !dereferenceable_or_null !0
+  br label %cont
+
+f:
+  call void @baz()
+  %v2 = load i32*, i32** %p2, align 8, !dereferenceable_or_null !1
+  br label %cont
+
+cont:
+  %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+  ret i32* %res
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/llvm/test/Transforms/InstCombine/phi-load-metadata-dominance.ll b/llvm/test/Transforms/InstCombine/phi-load-metadata-dominance.ll
new file mode 100644
index 00000000000..0c5aab85890
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-load-metadata-dominance.ll
@@ -0,0 +1,26 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that nonnull metadata is from non-dominating loads is not propagated.
+; CHECK-LABEL: cont:
+; CHECK-NOT: !nonnull
+define i32* @test_combine_metadata_dominance(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+  br i1 %c, label %t, label %f
+t:
+  call void @bar()
+  %v1 = load i32*, i32** %p1, align 8, !nonnull !0
+  br label %cont
+
+f:
+  call void @baz()
+  %v2 = load i32*, i32** %p2, align 8
+  br label %cont
+
+cont:
+  %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+  ret i32* %res
+}
+
+!0 = !{}
diff --git a/llvm/test/Transforms/InstCombine/phi-load-metadata.ll b/llvm/test/Transforms/InstCombine/phi-load-metadata.ll
new file mode 100644
index 00000000000..004a355ca44
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-load-metadata.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that align metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+  br i1 %c, label %t, label %f
+t:
+  call void @bar()
+  %v1 = load i32*, i32** %p1, align 8, !align !0
+  br label %cont
+
+f:
+  call void @baz()
+  %v2 = load i32*, i32** %p2, align 8, !align !1
+  br label %cont
+
+cont:
+  %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+  ret i32* %res
+}
+
+; CHECK: ![[ALIGN]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/llvm/test/Transforms/InstCombine/phi-merge-gep.ll b/llvm/test/Transforms/InstCombine/phi-merge-gep.ll
new file mode 100644
index 00000000000..b548e51a56a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-merge-gep.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -S -instcombine > %t
+; RUN: grep "= getelementptr" %t | count 20
+; RUN: grep "= phi" %t | count 13
+
+; Don't push the geps through these phis, because they would require
+; two phis each, which burdens the loop with high register pressure.
+
+define void @foo(float* %Ar, float* %Ai, i64 %As, float* %Cr, float* %Ci, i64 %Cs, i64 %n) nounwind {
+entry:
+  %0 = getelementptr inbounds float, float* %Ar, i64 0   ; <float*> [#uses=1]
+  %1 = getelementptr inbounds float, float* %Ai, i64 0   ; <float*> [#uses=1]
+  %2 = mul i64 %n, %As                            ; <i64> [#uses=1]
+  %3 = getelementptr inbounds float, float* %Ar, i64 %2  ; <float*> [#uses=1]
+  %4 = mul i64 %n, %As                            ; <i64> [#uses=1]
+  %5 = getelementptr inbounds float, float* %Ai, i64 %4  ; <float*> [#uses=1]
+  %6 = mul i64 %n, 2                              ; <i64> [#uses=1]
+  %7 = mul i64 %6, %As                            ; <i64> [#uses=1]
+  %8 = getelementptr inbounds float, float* %Ar, i64 %7  ; <float*> [#uses=1]
+  %9 = mul i64 %n, 2                              ; <i64> [#uses=1]
+  %10 = mul i64 %9, %As                           ; <i64> [#uses=1]
+  %11 = getelementptr inbounds float, float* %Ai, i64 %10 ; <float*> [#uses=1]
+  %12 = getelementptr inbounds float, float* %Cr, i64 0  ; <float*> [#uses=1]
+  %13 = getelementptr inbounds float, float* %Ci, i64 0  ; <float*> [#uses=1]
+  %14 = mul i64 %n, %Cs                           ; <i64> [#uses=1]
+  %15 = getelementptr inbounds float, float* %Cr, i64 %14 ; <float*> [#uses=1]
+  %16 = mul i64 %n, %Cs                           ; <i64> [#uses=1]
+  %17 = getelementptr inbounds float, float* %Ci, i64 %16 ; <float*> [#uses=1]
+  %18 = mul i64 %n, 2                             ; <i64> [#uses=1]
+  %19 = mul i64 %18, %Cs                          ; <i64> [#uses=1]
+  %20 = getelementptr inbounds float, float* %Cr, i64 %19 ; <float*> [#uses=1]
+  %21 = mul i64 %n, 2                             ; <i64> [#uses=1]
+  %22 = mul i64 %21, %Cs                          ; <i64> [#uses=1]
+  %23 = getelementptr inbounds float, float* %Ci, i64 %22 ; <float*> [#uses=1]
+  br label %bb13
+
+bb:                                               ; preds = %bb13
+  %24 = load float, float* %A0r.0, align 4               ; <float> [#uses=1]
+  %25 = load float, float* %A0i.0, align 4               ; <float> [#uses=1]
+  %26 = load float, float* %A1r.0, align 4               ; <float> [#uses=2]
+  %27 = load float, float* %A1i.0, align 4               ; <float> [#uses=2]
+  %28 = load float, float* %A2r.0, align 4               ; <float> [#uses=2]
+  %29 = load float, float* %A2i.0, align 4               ; <float> [#uses=2]
+  %30 = fadd float %26, %28                       ; <float> [#uses=2]
+  %31 = fadd float %27, %29                       ; <float> [#uses=2]
+  %32 = fsub float %26, %28                       ; <float> [#uses=1]
+  %33 = fsub float %27, %29                       ; <float> [#uses=1]
+  %34 = fadd float %24, %30                       ; <float> [#uses=2]
+  %35 = fadd float %25, %31                       ; <float> [#uses=2]
+  %36 = fmul float %30, -1.500000e+00             ; <float> [#uses=1]
+  %37 = fmul float %31, -1.500000e+00             ; <float> [#uses=1]
+  %38 = fadd float %34, %36                       ; <float> [#uses=2]
+  %39 = fadd float %35, %37                       ; <float> [#uses=2]
+  %40 = fmul float %32, 0x3FEBB67AE0000000        ; <float> [#uses=2]
+  %41 = fmul float %33, 0x3FEBB67AE0000000        ; <float> [#uses=2]
+  %42 = fadd float %38, %41                       ; <float> [#uses=1]
+  %43 = fsub float %39, %40                       ; <float> [#uses=1]
+  %44 = fsub float %38, %41                       ; <float> [#uses=1]
+  %45 = fadd float %39, %40                       ; <float> [#uses=1]
+  store float %34, float* %C0r.0, align 4
+  store float %35, float* %C0i.0, align 4
+  store float %42, float* %C1r.0, align 4
+  store float %43, float* %C1i.0, align 4
+  store float %44, float* %C2r.0, align 4
+  store float %45, float* %C2i.0, align 4
+  %46 = getelementptr inbounds float, float* %A0r.0, i64 %As ; <float*> [#uses=1]
+  %47 = getelementptr inbounds float, float* %A0i.0, i64 %As ; <float*> [#uses=1]
+  %48 = getelementptr inbounds float, float* %A1r.0, i64 %As ; <float*> [#uses=1]
+  %49 = getelementptr inbounds float, float* %A1i.0, i64 %As ; <float*> [#uses=1]
+  %50 = getelementptr inbounds float, float* %A2r.0, i64 %As ; <float*> [#uses=1]
+  %51 = getelementptr inbounds float, float* %A2i.0, i64 %As ; <float*> [#uses=1]
+  %52 = getelementptr inbounds float, float* %C0r.0, i64 %Cs ; <float*> [#uses=1]
+  %53 = getelementptr inbounds float, float* %C0i.0, i64 %Cs ; <float*> [#uses=1]
+  %54 = getelementptr inbounds float, float* %C1r.0, i64 %Cs ; <float*> [#uses=1]
+  %55 = getelementptr inbounds float, float* %C1i.0, i64 %Cs ; <float*> [#uses=1]
+  %56 = getelementptr inbounds float, float* %C2r.0, i64 %Cs ; <float*> [#uses=1]
+  %57 = getelementptr inbounds float, float* %C2i.0, i64 %Cs ; <float*> [#uses=1]
+  %58 = add nsw i64 %i.0, 1                       ; <i64> [#uses=1]
+  br label %bb13
+
+bb13:                                             ; preds = %bb, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %58, %bb ]      ; <i64> [#uses=2]
+  %C2i.0 = phi float* [ %23, %entry ], [ %57, %bb ] ; <float*> [#uses=2]
+  %C2r.0 = phi float* [ %20, %entry ], [ %56, %bb ] ; <float*> [#uses=2]
+  %C1i.0 = phi float* [ %17, %entry ], [ %55, %bb ] ; <float*> [#uses=2]
+  %C1r.0 = phi float* [ %15, %entry ], [ %54, %bb ] ; <float*> [#uses=2]
+  %C0i.0 = phi float* [ %13, %entry ], [ %53, %bb ] ; <float*> [#uses=2]
+  %C0r.0 = phi float* [ %12, %entry ], [ %52, %bb ] ; <float*> [#uses=2]
+  %A2i.0 = phi float* [ %11, %entry ], [ %51, %bb ] ; <float*> [#uses=2]
+  %A2r.0 = phi float* [ %8, %entry ], [ %50, %bb ] ; <float*> [#uses=2]
+  %A1i.0 = phi float* [ %5, %entry ], [ %49, %bb ] ; <float*> [#uses=2]
+  %A1r.0 = phi float* [ %3, %entry ], [ %48, %bb ] ; <float*> [#uses=2]
+  %A0i.0 = phi float* [ %1, %entry ], [ %47, %bb ] ; <float*> [#uses=2]
+  %A0r.0 = phi float* [ %0, %entry ], [ %46, %bb ] ; <float*> [#uses=2]
+  %59 = icmp slt i64 %i.0, %n                     ; <i1> [#uses=1]
+  br i1 %59, label %bb, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br label %return
+
+return:                                           ; preds = %bb14
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/phi-preserve-ir-flags.ll b/llvm/test/Transforms/InstCombine/phi-preserve-ir-flags.ll
new file mode 100644
index 00000000000..6e3ae8087cb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-preserve-ir-flags.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -S -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: define float @func1(
+define float @func1(float %a, float %b, float %c, i1 %cond) {
+entry:
+  br i1 %cond, label %cond.true, label %cond.false
+
+cond.true:
+  %sub0 = fsub fast float %a, %b
+  br label %cond.end
+
+cond.false:
+  %sub1 = fsub fast float %a, %c
+  br label %cond.end
+
+; The fast-math flags should always be transfered if possible.
+; CHECK-LABEL: cond.end
+; CHECK  [[PHI:%[^ ]*]] = phi float [ %b, %cond.true ], [ %c, %cond.false ]
+; CHECK  fsub fast float %a, [[PHI]]
+cond.end:
+  %e = phi float [ %sub0, %cond.true ], [ %sub1, %cond.false ]
+  ret float %e
+}
+
+; CHECK-LABEL: define float @func2(
+define float @func2(float %a, float %b, float %c, i1 %cond) {
+entry:
+  br i1 %cond, label %cond.true, label %cond.false
+
+cond.true:
+  %sub0 = fsub fast float %a, %b
+  br label %cond.end
+
+cond.false:
+  %sub1 = fsub float %a, %c
+  br label %cond.end
+
+; The fast-math flags should always be transfered if possible.
+; CHECK-LABEL: cond.end
+; CHECK  [[PHI:%[^ ]*]] = phi float [ %b, %cond.true ], [ %c, %cond.false ]
+; CHECK  fsub float %a, [[PHI]]
+cond.end:
+  %e = phi float [ %sub0, %cond.true ], [ %sub1, %cond.false ]
+  ret float %e
+}
+
+; CHECK-LABEL: define float @func3(
+define float @func3(float %a, float %b, float %c, i1 %cond) {
+entry:
+  br i1 %cond, label %cond.true, label %cond.false
+
+cond.true:
+  %sub0 = fsub fast float %a, 2.0
+  br label %cond.end
+
+cond.false:
+  %sub1 = fsub fast float %b, 2.0
+  br label %cond.end
+
+; CHECK-LABEL: cond.end
+; CHECK  [[PHI:%[^ ]*]] = phi float [ %a, %cond.true ], [ %b, %cond.false ]
+; CHECK  fadd fast float %a, [[PHI]]
+cond.end:
+  %e = phi float [ %sub0, %cond.true ], [ %sub1, %cond.false ]
+  ret float %e
+}
+
+; CHECK-LABEL: define float @func4(
+define float @func4(float %a, float %b, float %c, i1 %cond) {
+entry:
+  br i1 %cond, label %cond.true, label %cond.false
+
+cond.true:
+  %sub0 = fsub fast float %a, 2.0
+  br label %cond.end
+
+cond.false:
+  %sub1 = fsub float %b, 2.0
+  br label %cond.end
+
+; CHECK-LABEL: cond.end
+; CHECK  [[PHI:%[^ ]*]] = phi float [ %a, %cond.true ], [ %b, %cond.false ]
+; CHECK  fadd float %a, [[PHI]]
+cond.end:
+  %e = phi float [ %sub0, %cond.true ], [ %sub1, %cond.false ]
+  ret float %e
+}
diff --git a/llvm/test/Transforms/InstCombine/phi-select-constant.ll b/llvm/test/Transforms/InstCombine/phi-select-constant.ll
new file mode 100644
index 00000000000..9d1c973925b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-select-constant.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -instcombine | FileCheck %s
+@A = extern_weak global i32, align 4
+@B = extern_weak global i32, align 4
+
+define i32 @foo(i1 %which) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[USE2:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ select (i1 icmp eq (i32* @A, i32* @B), i32 2, i32 1), [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[USE2]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %use2 = phi i1 [ false, %entry ], [ icmp eq (i32* @A, i32* @B), %delay ]
+  %value = select i1 %use2, i32 2, i32 1
+  ret i32 %value
+}
+
+
+; test folding of select into phi for vectors.
+define <4 x i64> @vec1(i1 %which) {
+; CHECK-LABEL: @vec1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[PHINODE:%.*]] = phi <4 x i64> [ zeroinitializer, [[ENTRY:%.*]] ], [ <i64 0, i64 0, i64 126, i64 127>, [[DELAY]] ]
+; CHECK-NEXT:    ret <4 x i64> [[PHINODE]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %phinode =  phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, %entry ], [ <i1 true, i1 true, i1 false, i1 false>, %delay ]
+  %sel = select <4 x i1> %phinode, <4 x i64> zeroinitializer, <4 x i64> <i64 124, i64 125, i64 126, i64 127>
+  ret <4 x i64> %sel
+}
+
+define <4 x i64> @vec2(i1 %which) {
+; CHECK-LABEL: @vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[PHINODE:%.*]] = phi <4 x i64> [ <i64 124, i64 125, i64 126, i64 127>, [[ENTRY:%.*]] ], [ <i64 0, i64 125, i64 0, i64 127>, [[DELAY]] ]
+; CHECK-NEXT:    ret <4 x i64> [[PHINODE]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %phinode =  phi <4 x i1> [ <i1 false, i1 false, i1 false, i1 false>, %entry ], [ <i1 true, i1 false, i1 true, i1 false>, %delay ]
+  %sel = select <4 x i1> %phinode, <4 x i64> zeroinitializer, <4 x i64> <i64 124, i64 125, i64 126, i64 127>
+  ret <4 x i64> %sel
+}
+
+; Test PR33364
+; Insert the generated select into the same block as the incoming phi value.
+; phi has constant vectors along with a single non-constant vector as operands.
+define <2 x i8> @vec3(i1 %cond1, i1 %cond2, <2 x i1> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @vec3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PHITMP1:%.*]] = shufflevector <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[IF1:%.*]], label [[ELSE:%.*]]
+; CHECK:       if1:
+; CHECK-NEXT:    [[PHITMP2:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> [[Z]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[IF2:%.*]], label [[ELSE]]
+; CHECK:       if2:
+; CHECK-NEXT:    [[PHITMP:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> [[Y]], <2 x i8> [[Z]]
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[PHI:%.*]] = phi <2 x i8> [ [[PHITMP]], [[IF2]] ], [ [[PHITMP1]], [[ENTRY:%.*]] ], [ [[PHITMP2]], [[IF1]] ]
+; CHECK-NEXT:    ret <2 x i8> [[PHI]]
+;
+entry:
+  br i1 %cond1, label %if1, label %else
+
+if1:
+  br i1 %cond2, label %if2, label %else
+
+if2:
+  br label %else
+
+else:
+  %phi = phi <2 x i1> [ %x, %if2 ], [ <i1 0, i1 1>, %entry ], [ <i1 1, i1 0>, %if1 ]
+  %sel = select <2 x i1> %phi, <2 x i8> %y, <2 x i8> %z
+  ret <2 x i8> %sel
+}
diff --git a/llvm/test/Transforms/InstCombine/phi-timeout.ll b/llvm/test/Transforms/InstCombine/phi-timeout.ll
new file mode 100644
index 00000000000..883807ecd81
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi-timeout.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S 2>&1 | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; We are really checking that this doesn't loop forever. We would never
+; actually get to the checks here if it did.
+
+define void @timeout(i16* nocapture readonly %cinfo) {
+; CHECK-LABEL: @timeout(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i16, i16* [[CINFO:%.*]], i32 2
+; CHECK-NEXT:    [[L:%.*]] = load i16, i16* [[ARRAYIDX15]], align 2
+; CHECK-NEXT:    [[CMP17:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT:    [[EXTRACT_T1:%.*]] = trunc i16 [[L]] to i8
+; CHECK-NEXT:    br i1 [[CMP17]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[ARRAYIDX15]], align 2
+; CHECK-NEXT:    [[EXTRACT_T:%.*]] = trunc i16 [[DOTPRE]] to i8
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[P_OFF0:%.*]] = phi i8 [ [[EXTRACT_T]], [[IF_THEN]] ], [ [[EXTRACT_T1]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUB:%.*]] = add i8 [[P_OFF0]], -1
+; CHECK-NEXT:    store i8 [[SUB]], i8* undef, align 1
+; CHECK-NEXT:    br label [[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %arrayidx15 = getelementptr inbounds i16, i16* %cinfo, i32 2
+  %l = load i16, i16* %arrayidx15, align 2
+  %cmp17 = icmp eq i16 %l, 0
+  br i1 %cmp17, label %if.then, label %if.end
+
+if.then:
+  %.pre = load i16, i16* %arrayidx15, align 2
+  br label %if.end
+
+if.end:
+  %p = phi i16 [ %.pre, %if.then ], [ %l, %for.body ]
+  %conv19 = trunc i16 %p to i8
+  %sub = add i8 %conv19, -1
+  store i8 %sub, i8* undef, align 1
+  br label %for.body
+}
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
new file mode 100644
index 00000000000..c417737fdf2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -0,0 +1,881 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128:n8:16:32:64"
+
+define i32 @test1(i32 %A, i1 %b) {
+BB0:
+        br i1 %b, label %BB1, label %BB2
+
+BB1:
+        ; Combine away one argument PHI nodes
+        %B = phi i32 [ %A, %BB0 ]               
+        ret i32 %B
+
+BB2:
+        ret i32 %A
+; CHECK-LABEL: @test1(
+; CHECK: BB1:
+; CHECK-NEXT: ret i32 %A
+}
+
+define i32 @test2(i32 %A, i1 %b) {
+BB0:
+        br i1 %b, label %BB1, label %BB2
+
+BB1:
+        br label %BB2
+
+BB2:
+        ; Combine away PHI nodes with same values
+        %B = phi i32 [ %A, %BB0 ], [ %A, %BB1 ]         
+        ret i32 %B
+; CHECK-LABEL: @test2(
+; CHECK: BB2:
+; CHECK-NEXT: ret i32 %A
+}
+
+define i32 @test3(i32 %A, i1 %b) {
+BB0:
+        br label %Loop
+
+Loop:
+        ; PHI has same value always.
+        %B = phi i32 [ %A, %BB0 ], [ %B, %Loop ]
+        br i1 %b, label %Loop, label %Exit
+
+Exit:
+        ret i32 %B
+; CHECK-LABEL: @test3(
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %A
+}
+
+define i32 @test4(i1 %b) {
+BB0:
+        ; Loop is unreachable
+        ret i32 7
+
+Loop:           ; preds = %L2, %Loop
+        ; PHI has same value always.
+        %B = phi i32 [ %B, %L2 ], [ %B, %Loop ]         
+        br i1 %b, label %L2, label %Loop
+
+L2:             ; preds = %Loop
+        br label %Loop
+; CHECK-LABEL: @test4(
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
+}
+
+define i32 @test5(i32 %A, i1 %b) {
+BB0:
+        br label %Loop
+
+Loop:           ; preds = %Loop, %BB0
+        ; PHI has same value always.
+        %B = phi i32 [ %A, %BB0 ], [ undef, %Loop ]             
+        br i1 %b, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i32 %B
+; CHECK-LABEL: @test5(
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %A
+}
+
+define i32 @test6(i16 %A, i1 %b) {
+BB0:
+        %X = zext i16 %A to i32              
+        br i1 %b, label %BB1, label %BB2
+
+BB1:           
+        %Y = zext i16 %A to i32              
+        br label %BB2
+
+BB2:           
+        ;; Suck casts into phi
+        %B = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]         
+        ret i32 %B
+; CHECK-LABEL: @test6(
+; CHECK: BB2:
+; CHECK: zext i16 %A to i32
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test7(i32 %A, i1 %b) {
+BB0:
+        br label %Loop
+
+Loop:           ; preds = %Loop, %BB0
+        ; PHI is dead.
+        %B = phi i32 [ %A, %BB0 ], [ %C, %Loop ]                
+        %C = add i32 %B, 123            
+        br i1 %b, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i32 0
+; CHECK-LABEL: @test7(
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
+}
+
+define i32* @test8({ i32, i32 } *%A, i1 %b) {
+BB0:
+        %X = getelementptr inbounds { i32, i32 }, { i32, i32 } *%A, i32 0, i32 1
+        br i1 %b, label %BB1, label %BB2
+
+BB1:
+        %Y = getelementptr { i32, i32 }, { i32, i32 } *%A, i32 0, i32 1
+        br label %BB2
+
+BB2:
+        ;; Suck GEPs into phi
+        %B = phi i32* [ %X, %BB0 ], [ %Y, %BB1 ]
+        ret i32* %B
+; CHECK-LABEL: @test8(
+; CHECK-NOT: phi
+; CHECK: BB2:
+; CHECK-NEXT: %B = getelementptr { i32, i32 }, { i32, i32 }* %A 
+; CHECK-NEXT: ret i32* %B
+}
+
+define i32 @test9(i32* %A, i32* %B) {
+entry:
+  %c = icmp eq i32* %A, null
+  br i1 %c, label %bb1, label %bb
+
+bb:
+  %C = load i32, i32* %B, align 1
+  br label %bb2
+
+bb1:
+  %D = load i32, i32* %A, align 1
+  br label %bb2
+
+bb2:
+  %E = phi i32 [ %C, %bb ], [ %D, %bb1 ]
+  ret i32 %E
+; CHECK-LABEL: @test9(
+; CHECK:       bb2:
+; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
+; CHECK-NEXT:   %E = load i32, i32* %{{[^,]*}}, align 1
+; CHECK-NEXT:   ret i32 %E
+
+}
+
+define i32 @test10(i32* %A, i32* %B) {
+entry:
+  %c = icmp eq i32* %A, null
+  br i1 %c, label %bb1, label %bb
+
+bb:
+  %C = load i32, i32* %B, align 16
+  br label %bb2
+
+bb1:
+  %D = load i32, i32* %A, align 32
+  br label %bb2
+
+bb2:
+  %E = phi i32 [ %C, %bb ], [ %D, %bb1 ]
+  ret i32 %E
+; CHECK-LABEL: @test10(
+; CHECK:       bb2:
+; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
+; CHECK-NEXT:   %E = load i32, i32* %{{[^,]*}}, align 16
+; CHECK-NEXT:   ret i32 %E
+}
+
+
+; PR1777
+declare i1 @test11a()
+
+define i1 @test11() {
+entry:
+  %a = alloca i32
+  %i = ptrtoint i32* %a to i64
+  %b = call i1 @test11a()
+  br i1 %b, label %one, label %two
+
+one:
+  %x = phi i64 [%i, %entry], [%y, %two]
+  %c = call i1 @test11a()
+  br i1 %c, label %two, label %end
+
+two:
+  %y = phi i64 [%i, %entry], [%x, %one]
+  %d = call i1 @test11a()
+  br i1 %d, label %one, label %end
+
+end:
+  %f = phi i64 [ %x, %one], [%y, %two]
+  ; Change the %f to %i, and the optimizer suddenly becomes a lot smarter
+  ; even though %f must equal %i at this point
+  %g = inttoptr i64 %f to i32*
+  store i32 10, i32* %g
+  %z = call i1 @test11a()
+  ret i1 %z
+; CHECK-LABEL: @test11(
+; CHECK-NOT: phi i32
+; CHECK: ret i1 %z
+}
+
+
+define i64 @test12(i1 %cond, i8* %Ptr, i64 %Val) {
+entry:
+  %tmp41 = ptrtoint i8* %Ptr to i64
+  %tmp42 = zext i64 %tmp41 to i128
+  br i1 %cond, label %end, label %two
+
+two:
+  %tmp36 = zext i64 %Val to i128            ; <i128> [#uses=1]
+  %tmp37 = shl i128 %tmp36, 64                    ; <i128> [#uses=1]
+  %ins39 = or i128 %tmp42, %tmp37                 ; <i128> [#uses=1]
+  br label %end
+
+end:
+  %tmp869.0 = phi i128 [ %tmp42, %entry ], [ %ins39, %two ]
+  %tmp32 = trunc i128 %tmp869.0 to i64            ; <i64> [#uses=1]
+  %tmp29 = lshr i128 %tmp869.0, 64                ; <i128> [#uses=1]
+  %tmp30 = trunc i128 %tmp29 to i64               ; <i64> [#uses=1]
+
+  %tmp2 = add i64 %tmp32, %tmp30
+  ret i64 %tmp2
+; CHECK-LABEL: @test12(
+; CHECK-NOT: zext
+; CHECK: end:
+; CHECK-NEXT: phi i64 [ 0, %entry ], [ %Val, %two ]
+; CHECK-NOT: phi
+; CHECK: ret i64
+}
+
+declare void @test13f(double, i32)
+
+define void @test13(i1 %cond, i32 %V1, double %Vald) {
+entry:
+  %tmp42 = zext i32 %V1 to i128
+  br i1 %cond, label %end, label %two
+
+two:
+  %Val = bitcast double %Vald to i64
+  %tmp36 = zext i64 %Val to i128            ; <i128> [#uses=1]
+  %tmp37 = shl i128 %tmp36, 64                    ; <i128> [#uses=1]
+  %ins39 = or i128 %tmp42, %tmp37                 ; <i128> [#uses=1]
+  br label %end
+
+end:
+  %tmp869.0 = phi i128 [ %tmp42, %entry ], [ %ins39, %two ]
+  %tmp32 = trunc i128 %tmp869.0 to i32
+  %tmp29 = lshr i128 %tmp869.0, 64                ; <i128> [#uses=1]
+  %tmp30 = trunc i128 %tmp29 to i64               ; <i64> [#uses=1]
+  %tmp31 = bitcast i64 %tmp30 to double
+  
+  call void @test13f(double %tmp31, i32 %tmp32)
+  ret void
+; CHECK-LABEL: @test13(
+; CHECK-NOT: zext
+; CHECK: end:
+; CHECK-NEXT: phi double [ 0.000000e+00, %entry ], [ %Vald, %two ]
+; CHECK-NEXT: call void @test13f(double {{[^,]*}}, i32 %V1)
+; CHECK: ret void
+}
+
+define i640 @test14a(i320 %A, i320 %B, i1 %b1) {
+BB0:
+        %a = zext i320 %A to i640
+        %b = zext i320 %B to i640
+        br label %Loop
+
+Loop:
+        %C = phi i640 [ %a, %BB0 ], [ %b, %Loop ]             
+        br i1 %b1, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i640 %C
+; CHECK-LABEL: @test14a(
+; CHECK: Loop:
+; CHECK-NEXT: phi i320
+}
+
+define i160 @test14b(i320 %A, i320 %B, i1 %b1) {
+BB0:
+        %a = trunc i320 %A to i160
+        %b = trunc i320 %B to i160
+        br label %Loop
+
+Loop:
+        %C = phi i160 [ %a, %BB0 ], [ %b, %Loop ]             
+        br i1 %b1, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i160 %C
+; CHECK-LABEL: @test14b(
+; CHECK: Loop:
+; CHECK-NEXT: phi i160
+}
+
+declare i64 @test15a(i64)
+
+define i64 @test15b(i64 %A, i1 %b) {
+; CHECK-LABEL: @test15b(
+entry:
+  %i0 = zext i64 %A to i128
+  %i1 = shl i128 %i0, 64
+  %i = or i128 %i1, %i0
+  br i1 %b, label %one, label %two
+; CHECK: entry:
+; CHECK-NEXT: br i1 %b
+
+one:
+  %x = phi i128 [%i, %entry], [%y, %two]
+  %x1 = lshr i128 %x, 64
+  %x2 = trunc i128 %x1 to i64
+  %c = call i64 @test15a(i64 %x2)
+  %c1 = zext i64 %c to i128
+  br label %two
+
+; CHECK: one:
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: %c = call i64 @test15a
+
+two:
+  %y = phi i128 [%i, %entry], [%c1, %one]
+  %y1 = lshr i128 %y, 64
+  %y2 = trunc i128 %y1 to i64
+  %d = call i64 @test15a(i64 %y2)
+  %d1 = trunc i64 %d to i1
+  br i1 %d1, label %one, label %end
+
+; CHECK: two:
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: %d = call i64 @test15a
+
+end:
+  %g = trunc i128 %y to i64
+  ret i64 %g
+; CHECK: end: 
+; CHECK-NEXT: ret i64
+}
+
+; PR6512 - Shouldn't merge loads from different addr spaces.
+define i32 @test16(i32 addrspace(1)* %pointer1, i32 %flag, i32* %pointer2)
+nounwind {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %pointer1.addr = alloca i32 addrspace(1)*, align 4 ; <i32 addrspace(1)**>
+  %flag.addr = alloca i32, align 4                ; <i32*> [#uses=2]
+  %pointer2.addr = alloca i32*, align 4           ; <i32**> [#uses=2]
+  %res = alloca i32, align 4                      ; <i32*> [#uses=4]
+  store i32 addrspace(1)* %pointer1, i32 addrspace(1)** %pointer1.addr
+  store i32 %flag, i32* %flag.addr
+  store i32* %pointer2, i32** %pointer2.addr
+  store i32 10, i32* %res
+  %tmp = load i32, i32* %flag.addr                     ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+return:                                           ; preds = %if.end
+  %tmp7 = load i32, i32* %retval                       ; <i32> [#uses=1]
+  ret i32 %tmp7
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmp6 = load i32, i32* %res                          ; <i32> [#uses=1]
+  store i32 %tmp6, i32* %retval
+  br label %return
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load i32 addrspace(1)*, i32 addrspace(1)** %pointer1.addr  ; <i32 addrspace(1)*>
+  %arrayidx = getelementptr i32, i32 addrspace(1)* %tmp1, i32 0 ; <i32 addrspace(1)*> [#uses=1]
+  %tmp2 = load i32, i32 addrspace(1)* %arrayidx        ; <i32> [#uses=1]
+  store i32 %tmp2, i32* %res
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %tmp3 = load i32*, i32** %pointer2.addr               ; <i32*> [#uses=1]
+  %arrayidx4 = getelementptr i32, i32* %tmp3, i32 0    ; <i32*> [#uses=1]
+  %tmp5 = load i32, i32* %arrayidx4                    ; <i32> [#uses=1]
+  store i32 %tmp5, i32* %res
+  br label %if.end
+}
+
+; PR4413
+declare i32 @ext()
+; CHECK-LABEL: @test17(
+define i32 @test17(i1 %a) {
+entry:
+    br i1 %a, label %bb1, label %bb2
+
+bb1:        ; preds = %entry
+    %0 = tail call i32 @ext()        ; <i32> [#uses=1]
+    br label %bb2
+
+bb2:        ; preds = %bb1, %entry
+    %cond = phi i1 [ true, %bb1 ], [ false, %entry ]        ; <i1> [#uses=1]
+; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]
+    %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]        ; <i32> [#uses=1]
+    %res = select i1 %cond, i32 %val, i32 0        ; <i32> [#uses=1]
+; CHECK: ret i32 %cond
+    ret i32 %res
+}
+
+define i1 @test18(i1 %cond) {
+  %zero = alloca i32
+  %one = alloca i32
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %ptr = phi i32* [ %zero, %true ] , [ %one, %false ]
+  %isnull = icmp eq i32* %ptr, null
+  ret i1 %isnull
+; CHECK-LABEL: @test18(
+; CHECK: ret i1 false
+}
+
+define i1 @test19(i1 %cond, double %x) {
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %p = phi double [ %x, %true ], [ 0x7FF0000000000000, %false ]; RHS = +infty
+  %cmp = fcmp ule double %x, %p
+  ret i1 %cmp
+; CHECK-LABEL: @test19(
+; CHECK: ret i1 true
+}
+
+define i1 @test20(i1 %cond) {
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %p = phi i32* [ %a, %true ], [ %b, %false ]
+  %r = icmp eq i32* %p, %c
+  ret i1 %r
+; CHECK-LABEL: @test20(
+; CHECK: ret i1 false
+}
+
+define i1 @test21(i1 %c1, i1 %c2) {
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  br i1 %c1, label %true, label %false
+true:
+  br label %loop
+false:
+  br label %loop
+loop:
+  %p = phi i32* [ %a, %true ], [ %b, %false ], [ %p, %loop ]
+  %r = icmp eq i32* %p, %c
+  br i1 %c2, label %ret, label %loop
+ret:
+  ret i1 %r
+; CHECK-LABEL: @test21(
+; CHECK: ret i1 false
+}
+
+define void @test22() {
+; CHECK-LABEL: @test22(
+entry:
+  br label %loop
+loop:
+  %phi = phi i32 [ 0, %entry ], [ %y, %loop ]
+  %y = add i32 %phi, 1
+  %o = or i32 %y, %phi
+  %e = icmp eq i32 %o, %y
+  br i1 %e, label %loop, label %ret
+; CHECK: br i1 %e
+ret:
+  ret void
+}
+
+define i32 @test23(i32 %A, i1 %b, i32 * %P) {
+BB0:
+        br label %Loop
+
+Loop:           ; preds = %Loop, %BB0
+        ; PHI has same value always.
+        %B = phi i32 [ %A, %BB0 ], [ 42, %Loop ]
+        %D = add i32 %B, 19
+        store i32 %D, i32* %P
+        br i1 %b, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        %E = add i32 %B, 19
+        ret i32 %E
+; CHECK-LABEL: @test23(
+; CHECK: %phitmp = add i32 %A, 19
+; CHECK: Loop:
+; CHECK-NEXT: %B = phi i32 [ %phitmp, %BB0 ], [ 61, %Loop ]
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test24(i32 %A, i1 %cond) {
+BB0:
+        %X = add nuw nsw i32 %A, 1
+        br i1 %cond, label %BB1, label %BB2
+
+BB1:
+        %Y = add nuw i32 %A, 1
+        br label %BB2
+
+BB2:
+        %C = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]
+        ret i32 %C
+; CHECK-LABEL: @test24(
+; CHECK-NOT: phi
+; CHECK: BB2:
+; CHECK-NEXT: %C = add nuw i32 %A, 1
+; CHECK-NEXT: ret i32 %C
+}
+
+; Same as test11, but used to be missed due to a bug.
+declare i1 @test25a()
+
+define i1 @test25() {
+entry:
+  %a = alloca i32
+  %i = ptrtoint i32* %a to i64
+  %b = call i1 @test25a()
+  br i1 %b, label %one, label %two
+
+one:
+  %x = phi i64 [%y, %two], [%i, %entry]
+  %c = call i1 @test25a()
+  br i1 %c, label %two, label %end
+
+two:
+  %y = phi i64 [%x, %one], [%i, %entry]
+  %d = call i1 @test25a()
+  br i1 %d, label %one, label %end
+
+end:
+  %f = phi i64 [ %x, %one], [%y, %two]
+  ; Change the %f to %i, and the optimizer suddenly becomes a lot smarter
+  ; even though %f must equal %i at this point
+  %g = inttoptr i64 %f to i32*
+  store i32 10, i32* %g
+  %z = call i1 @test25a()
+  ret i1 %z
+; CHECK-LABEL: @test25(
+; CHECK-NOT: phi i32
+; CHECK: ret i1 %z
+}
+
+declare i1 @test26a()
+
+define i1 @test26(i32 %n) {
+entry:
+  %a = alloca i32
+  %i = ptrtoint i32* %a to i64
+  %b = call i1 @test26a()
+  br label %one
+
+one:
+  %x = phi i64 [%y, %two], [%w, %three], [%i, %entry]
+  %c = call i1 @test26a()
+  switch i32 %n, label %end [
+          i32 2, label %two
+          i32 3, label %three
+  ]
+
+two:
+  %y = phi i64 [%x, %one], [%w, %three]
+  %d = call i1 @test26a()
+  switch i32 %n, label %end [
+          i32 10, label %one
+          i32 30, label %three
+  ]
+
+three:
+  %w = phi i64 [%y, %two], [%x, %one]
+  %e = call i1 @test26a()
+  br i1 %e, label %one, label %two
+
+end:
+  %f = phi i64 [ %x, %one], [%y, %two]
+  ; Change the %f to %i, and the optimizer suddenly becomes a lot smarter
+  ; even though %f must equal %i at this point
+  %g = inttoptr i64 %f to i32*
+  store i32 10, i32* %g
+  %z = call i1 @test26a()
+  ret i1 %z
+; CHECK-LABEL: @test26(
+; CHECK-NOT: phi i32
+; CHECK: ret i1 %z
+}
+
+; CHECK-LABEL: @test27(
+; CHECK: ret i32 undef
+define i32 @test27(i1 %b) {
+entry:
+  br label %done
+done:
+  %y = phi i32 [ undef, %entry ]
+  ret i32 %y
+}
+
+; We should be able to fold the zexts to the other side of the phi
+; even though there's a constant value input to the phi. This is
+; because we can shrink that constant to the smaller phi type.
+
+define i1 @PR24766(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+  %conv = sext i8 %condition to i32
+  switch i32 %conv, label %epilog [
+    i32 0, label %sw1
+    i32 1, label %sw2
+  ]
+
+sw1:
+  %cmp1 = icmp eq i8 %x1, %x2
+  %frombool1 = zext i1 %cmp1 to i8
+  br label %epilog
+
+sw2:
+  %cmp2 = icmp sle i8 %x1, %x2
+  %frombool2 = zext i1 %cmp2 to i8
+  br label %epilog
+
+epilog:
+  %conditionMet = phi i8 [ 0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ]
+  %tobool = icmp ne i8 %conditionMet, 0
+  ret i1 %tobool
+
+; CHECK-LABEL: @PR24766(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]] 
+}
+
+; Same as above (a phi with more than 2 operands), but no constants
+ 
+define i1 @PR24766_no_constants(i8 %x1, i8 %x2, i8 %condition, i1 %another_condition) {
+entry:
+  %frombool0 = zext i1 %another_condition to i8
+  %conv = sext i8 %condition to i32
+  switch i32 %conv, label %epilog [
+    i32 0, label %sw1
+    i32 1, label %sw2
+  ]
+
+sw1:
+  %cmp1 = icmp eq i8 %x1, %x2
+  %frombool1 = zext i1 %cmp1 to i8
+  br label %epilog
+
+sw2:
+  %cmp2 = icmp sle i8 %x1, %x2
+  %frombool2 = zext i1 %cmp2 to i8
+  br label %epilog
+
+epilog:
+  %conditionMet = phi i8 [ %frombool0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ]
+  %tobool = icmp ne i8 %conditionMet, 0
+  ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_no_constants(
+; CHECK: %[[RES:.*]] = phi i1 [ %another_condition, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but two constants
+
+define i1 @PR24766_two_constants(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+  %conv = sext i8 %condition to i32
+  switch i32 %conv, label %epilog [
+    i32 0, label %sw1
+    i32 1, label %sw2
+  ]
+
+sw1:
+  %cmp1 = icmp eq i8 %x1, %x2
+  %frombool1 = zext i1 %cmp1 to i8
+  br label %epilog
+
+sw2:
+  %cmp2 = icmp sle i8 %x1, %x2
+  %frombool2 = zext i1 %cmp2 to i8
+  br label %epilog
+
+epilog:
+  %conditionMet = phi i8 [ 0, %entry ], [ 1, %sw2 ], [ %frombool1, %sw1 ]
+  %tobool = icmp ne i8 %conditionMet, 0
+  ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_two_constants(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ true, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but two constants and two variables
+
+define i1 @PR24766_two_constants_two_var(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+  %conv = sext i8 %condition to i32
+  switch i32 %conv, label %epilog [
+    i32 0, label %sw1
+    i32 1, label %sw2
+    i32 2, label %sw3
+  ]
+
+sw1:
+  %cmp1 = icmp eq i8 %x1, %x2
+  %frombool1 = zext i1 %cmp1 to i8
+  br label %epilog
+
+sw2:
+  %cmp2 = icmp sle i8 %x1, %x2
+  %frombool2 = zext i1 %cmp2 to i8
+  br label %epilog
+
+sw3:
+  %cmp3 = icmp sge i8 %x1, %x2
+  %frombool3 = zext i1 %cmp3 to i8
+  br label %epilog
+
+epilog:
+  %conditionMet = phi i8 [ 0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ], [ 1, %sw3 ]
+  %tobool = icmp ne i8 %conditionMet, 0
+  ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_two_constants_two_var(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ], [ true, %sw3 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; CHECK-LABEL: phi_allnonzeroconstant
+; CHECK-NOT: phi i32
+; CHECK: ret i1 false
+define i1 @phi_allnonzeroconstant(i1 %c, i32 %a, i32 %b) {
+entry:
+  br i1 %c, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  call void @dummy()
+
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %x.0 = phi i32 [ 1, %if.then ], [ 2, %if.else ]
+  %or = or i32 %x.0, %a
+  %cmp1 = icmp eq i32 %or, 0
+  ret i1 %cmp1
+}
+
+declare void @dummy()
+
+; CHECK-LABEL: @phi_knownnonzero_eq
+; CHECK-LABEL: if.then:
+; CHECK-NOT: select
+; CHECK-LABEL: if.end:
+; CHECK: phi i32 [ 1, %if.then ]
+define i1 @phi_knownnonzero_eq(i32 %n, i32 %s, i32* nocapture readonly %P) {
+entry:
+  %tobool = icmp slt  i32 %n, %s
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32, i32* %P
+  %cmp = icmp eq i32 %n, %0
+  %1 = select i1 %cmp, i32 1, i32 2
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi i32 [ %1,  %if.then ], [ %n, %entry ]
+  %cmp1 = icmp eq i32 %a.0, 0
+  ret i1  %cmp1
+}
+
+; CHECK-LABEL: @phi_knownnonzero_ne
+; CHECK-LABEL: if.then:
+; CHECK-NOT: select
+; CHECK-LABEL: if.end:
+; CHECK: phi i32 [ 1, %if.then ]
+define i1 @phi_knownnonzero_ne(i32 %n, i32 %s, i32* nocapture readonly %P) {
+entry:
+  %tobool = icmp slt  i32 %n, %s
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32, i32* %P
+  %cmp = icmp eq i32 %n, %0
+  %1 = select i1 %cmp, i32 1, i32 2
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi i32 [ %1,  %if.then ], [ %n, %entry ]
+  %cmp1 = icmp ne i32 %a.0, 0
+  ret i1  %cmp1
+}
+
+; CHECK-LABEL: @phi_knownnonzero_eq_2
+; CHECK-LABEL: if.then:
+; CHECK-NOT: select
+; CHECK-LABEL: if.end:
+; CHECK: phi i32 [ 2, %if.else ]
+define i1 @phi_knownnonzero_eq_2(i32 %n, i32 %s, i32* nocapture readonly %P) {
+entry:
+  %tobool = icmp slt  i32 %n, %s
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %tobool2 = icmp slt  i32 %n, %s
+  br i1 %tobool2, label %if.else, label %if.end
+
+if.else:                                          ; preds = %entry
+  %0 = load i32, i32* %P
+  %cmp = icmp eq i32 %n, %0
+  %1 = select i1 %cmp, i32 1, i32 2
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi i32 [ %1,  %if.else], [ %n, %entry ], [2, %if.then]
+  %cmp1 = icmp eq i32 %a.0, 0
+  ret i1  %cmp1
+}
+
+; CHECK-LABEL: @phi_knownnonzero_ne_2
+; CHECK-LABEL: if.then:
+; CHECK-NOT: select
+; CHECK-LABEL: if.end:
+; CHECK: phi i32 [ 2, %if.else ]
+define i1 @phi_knownnonzero_ne_2(i32 %n, i32 %s, i32* nocapture readonly %P) {
+entry:
+  %tobool = icmp slt  i32 %n, %s
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %tobool2 = icmp slt  i32 %n, %s
+  br i1 %tobool2, label %if.else, label %if.end
+
+if.else:                                          ; preds = %entry
+  %0 = load i32, i32* %P
+  %cmp = icmp eq i32 %n, %0
+  %1 = select i1 %cmp, i32 1, i32 2
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi i32 [ %1,  %if.else], [ %n, %entry ], [2, %if.then]
+  %cmp1 = icmp ne i32 %a.0, 0
+  ret i1  %cmp1
+}
diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll
new file mode 100644
index 00000000000..957e2488b72
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pow-1.ll
@@ -0,0 +1,488 @@
+; Test that the pow library call simplifier works correctly.
+;
+; RUN: opt -instcombine -S < %s                                   | FileCheck %s --check-prefixes=CHECK,ANY
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9  | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0         | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8  | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0         | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-netbsd            | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-tvos9.0        | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-watchos2.0     | FileCheck %s --check-prefixes=CHECK,ANY,CHECK-EXP10
+; rdar://7251832
+; RUN: opt -instcombine -S < %s -mtriple=i386-pc-windows-msvc18   | FileCheck %s --check-prefixes=CHECK,MSVC,VC32,CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=i386-pc-windows-msvc     | FileCheck %s --check-prefixes=CHECK,MSVC,VC51,VC19,CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-pc-windows-msvc18 | FileCheck %s --check-prefixes=CHECK,MSVC,VC64,CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-pc-windows-msvc   | FileCheck %s --check-prefixes=CHECK,MSVC,VC83,VC19,CHECK-NO-EXP10
+
+; NOTE: The readonly attribute on the pow call should be preserved
+; in the cases below where pow is transformed into another function call.
+
+declare float @powf(float, float) nounwind readonly
+declare double @pow(double, double) nounwind readonly
+declare double @llvm.pow.f64(double, double)
+declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) nounwind readonly
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) nounwind readonly
+
+; Check pow(1.0, x) -> 1.0.
+
+define float @test_simplify1(float %x) {
+; CHECK-LABEL: @test_simplify1(
+; ANY-NEXT:    ret float 1.000000e+00
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float 1.000000e+00, float [[X:%.*]])
+; VC32-NEXT:   ret float [[POW]]
+; VC64-NEXT:   ret float 1.000000e+00
+;
+  %retval = call float @powf(float 1.0, float %x)
+  ret float %retval
+}
+
+define <2 x float> @test_simplify1v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify1v(
+; ANY-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> [[X:%.*]])
+; MSVC-NEXT:   ret <2 x float> [[POW]]
+;
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 1.0, float 1.0>, <2 x float> %x)
+  ret <2 x float> %retval
+}
+
+define double @test_simplify2(double %x) {
+; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT:  ret double 1.000000e+00
+;
+  %retval = call double @pow(double 1.0, double %x)
+  ret double %retval
+}
+
+define <2 x double> @test_simplify2v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify2v(
+; ANY-NEXT:    ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x double> [[X:%.*]])
+; MSVC-NEXT:   ret <2 x double> [[POW]]
+;
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 1.0, double 1.0>, <2 x double> %x)
+  ret <2 x double> %retval
+}
+
+; Check pow(2.0 ** n, x) -> exp2(n * x).
+
+define float @test_simplify3(float %x) {
+; CHECK-LABEL: @test_simplify3(
+; ANY-NEXT:    [[EXP2F:%.*]] = call float @exp2f(float [[X:%.*]])
+; ANY-NEXT:    ret float [[EXP2F]]
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float 2.000000e+00, float [[X:%.*]])
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call float @powf(float 2.000000e+00, float [[X:%.*]])
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   [[POW:%.*]] = call float @powf(float 2.000000e+00, float [[X:%.*]])
+; VC64-NEXT:   ret float [[POW]]
+; VC83-NEXT:   [[EXP2F:%.*]] = call float @exp2f(float [[X:%.*]])
+; VC83-NEXT:   ret float [[EXP2F]]
+;
+  %retval = call float @powf(float 2.0, float %x)
+  ret float %retval
+}
+
+define double @test_simplify3n(double %x) {
+; CHECK-LABEL: @test_simplify3n(
+; ANY-NEXT:    [[MUL:%.*]] = fmul double [[X:%.*]], -2.000000e+00
+; ANY-NEXT:    [[EXP2:%.*]] = call double @exp2(double [[MUL]])
+; ANY-NEXT:    ret double [[EXP2]]
+; VC19-NEXT:   [[MUL:%.*]] = fmul double [[X:%.*]], -2.000000e+00
+; VC19-NEXT:   [[EXP2:%.*]] = call double @exp2(double [[MUL]])
+; VC19-NEXT:   ret double [[EXP2]]
+; VC32-NEXT:   [[POW:%.*]] = call double @pow(double 2.500000e-01, double [[X:%.*]])
+; VC32-NEXT:   ret double [[POW]]
+; VC64-NEXT:   [[POW:%.*]] = call double @pow(double 2.500000e-01, double [[X:%.*]])
+; VC64-NEXT:   ret double [[POW]]
+;
+  %retval = call double @pow(double 0.25, double %x)
+  ret double %retval
+}
+
+define <2 x float> @test_simplify3v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify3v(
+; ANY-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[X:%.*]])
+; ANY-NEXT:    ret <2 x float> [[EXP2]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> [[X:%.*]])
+; MSVC-NEXT:   ret <2 x float> [[POW]]
+;
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.0, float 2.0>, <2 x float> %x)
+  ret <2 x float> %retval
+}
+
+define <2 x double> @test_simplify3vn(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify3vn(
+; ANY-NEXT:    [[MUL:%.*]] = fmul <2 x double> [[X:%.*]], <double 2.000000e+00, double 2.000000e+00>
+; ANY-NEXT:    [[EXP2:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]])
+; ANY-NEXT:    ret <2 x double> [[EXP2]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 4.000000e+00, double 4.000000e+00>, <2 x double> [[X:%.*]])
+; MSVC-NEXT:   ret <2 x double> [[POW]]
+;
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 4.0, double 4.0>, <2 x double> %x)
+  ret <2 x double> %retval
+}
+
+define double @test_simplify4(double %x) {
+; CHECK-LABEL: @test_simplify4(
+; ANY-NEXT:    [[EXP2:%.*]] = call double @exp2(double [[X:%.*]])
+; ANY-NEXT:    ret double [[EXP2]]
+; VC19-NEXT:   [[EXP2:%.*]] = call double @exp2(double [[X:%.*]])
+; VC19-NEXT:   ret double [[EXP2]]
+; VC32-NEXT:   [[POW:%.*]] = call double @pow(double 2.000000e+00, double [[X:%.*]])
+; VC32-NEXT:   ret double [[POW]]
+; VC64-NEXT:   [[POW:%.*]] = call double @pow(double 2.000000e+00, double [[X:%.*]])
+; VC64-NEXT:   ret double [[POW]]
+;
+  %retval = call double @pow(double 2.0, double %x)
+  ret double %retval
+}
+
+define float @test_simplify4n(float %x) {
+; CHECK-LABEL: @test_simplify4n(
+; ANY-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], 3.000000e+00
+; ANY-NEXT:    [[EXP2F:%.*]] = call float @exp2f(float [[MUL]])
+; ANY-NEXT:    ret float [[EXP2F]]
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float 8.000000e+00, float [[X:%.*]])
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call float @powf(float 8.000000e+00, float [[X:%.*]])
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   [[POW:%.*]] = call float @powf(float 8.000000e+00, float [[X:%.*]])
+; VC64-NEXT:   ret float [[POW]]
+; VC83-NEXT:   [[MUL:%.*]] = fmul float [[X:%.*]], 3.000000e+00
+; VC83-NEXT:   [[EXP2F:%.*]] = call float @exp2f(float [[MUL]])
+; VC83-NEXT:   ret float [[EXP2F]]
+;
+  %retval = call float @powf(float 8.0, float %x)
+  ret float %retval
+}
+
+define <2 x double> @test_simplify4v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify4v(
+; ANY-NEXT:    [[EXP2:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[X:%.*]])
+; ANY-NEXT:    ret <2 x double> [[EXP2]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> [[X:%.*]])
+; MSVC-NEXT:   ret <2 x double> [[POW]]
+;
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.0, double 2.0>, <2 x double> %x)
+  ret <2 x double> %retval
+}
+
+define <2 x float> @test_simplify4vn(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify4vn(
+; ANY-NEXT:    [[MUL:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
+; ANY-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUL]])
+; ANY-NEXT:    ret <2 x float> [[EXP2]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> [[X:%.*]])
+; MSVC-NEXT:   ret <2 x float> [[POW]]
+;
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 0.5, float 0.5>, <2 x float> %x)
+  ret <2 x float> %retval
+}
+
+; Check pow(x, 0.0) -> 1.0.
+
+define float @test_simplify5(float %x) {
+; CHECK-LABEL: @test_simplify5(
+; ANY-NEXT:    ret float 1.000000e+00
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float 0.000000e+00)
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float 0.000000e+00)
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   ret float 1.000000e+00
+; VC83-NEXT:   ret float 1.000000e+00
+;
+  %retval = call float @powf(float %x, float 0.0)
+  ret float %retval
+}
+
+define <2 x float> @test_simplify5v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify5v(
+; ANY-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[X:%.*]], <2 x float> zeroinitializer)
+; MSVC-NEXT:   ret <2 x float> [[POW]]
+;
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 0.0, float 0.0>)
+  ret <2 x float> %retval
+}
+
+define double @test_simplify6(double %x) {
+; CHECK-LABEL: @test_simplify6(
+; CHECK-NEXT:  ret double 1.000000e+00
+;
+  %retval = call double @pow(double %x, double 0.0)
+  ret double %retval
+}
+
+define <2 x double> @test_simplify6v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify6v(
+; ANY-NEXT:    ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> zeroinitializer)
+; MSVC-NEXT:   ret <2 x double> [[POW]]
+;
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 0.0, double 0.0>)
+  ret <2 x double> %retval
+}
+
+; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity.
+
+define float @test_simplify7(float %x) {
+; CHECK-LABEL: @test_simplify7(
+; ANY-NEXT:    [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]])
+; ANY-NEXT:    [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]])
+; ANY-NEXT:    [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000
+; ANY-NEXT:    [[TMP1:%.*]] = select i1 [[ISINF]], float 0x7FF0000000000000, float [[ABS]]
+; ANY-NEXT:    ret float [[TMP1]]
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float 5.000000e-01)
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float 5.000000e-01)
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]])
+; VC64-NEXT:   [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]])
+; VC64-NEXT:   [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000
+; VC64-NEXT:   [[TMP1:%.*]] = select i1 [[ISINF]], float 0x7FF0000000000000, float [[ABS]]
+; VC64-NEXT:   ret float [[TMP1]]
+; VC83-NEXT:   [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]])
+; VC83-NEXT:   [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]])
+; VC83-NEXT:   [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000
+; VC83-NEXT:   [[TMP1:%.*]] = select i1 [[ISINF]], float 0x7FF0000000000000, float [[ABS]]
+; VC83-NEXT:   ret float [[TMP1]]
+;
+  %retval = call float @powf(float %x, float 0.5)
+  ret float %retval
+}
+
+define double @test_simplify8(double %x) {
+; CHECK-LABEL: @test_simplify8(
+; CHECK-NEXT:  [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:  [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT:  [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:  [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]]
+; CHECK-NEXT:  ret double [[TMP1]]
+;
+  %retval = call double @pow(double %x, double 0.5)
+  ret double %retval
+}
+
+; Check pow(-infinity, 0.5) -> +infinity.
+
+define float @test_simplify9(float %x) {
+; CHECK-LABEL: @test_simplify9(
+; ANY-NEXT:    ret float 0x7FF0000000000000
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float 0xFFF0000000000000, float 5.000000e-01)
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call float @powf(float 0xFFF0000000000000, float 5.000000e-01)
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   ret float 0x7FF0000000000000
+; VC83-NEXT:   ret float 0x7FF0000000000000
+;
+  %retval = call float @powf(float 0xFFF0000000000000, float 0.5)
+  ret float %retval
+}
+
+define double @test_simplify10(double %x) {
+; CHECK-LABEL: @test_simplify10(
+; CHECK-NEXT:  ret double 0x7FF0000000000000
+;
+  %retval = call double @pow(double 0xFFF0000000000000, double 0.5)
+  ret double %retval
+}
+
+; Check pow(x, 1.0) -> x.
+
+define float @test_simplify11(float %x) {
+; CHECK-LABEL: @test_simplify11(
+; ANY-NEXT:    ret float [[X:%.*]]
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float 1.000000e+00)
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float 1.000000e+00)
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   ret float [[X:%.*]]
+; VC83-NEXT:   ret float [[X:%.*]]
+;
+  %retval = call float @powf(float %x, float 1.0)
+  ret float %retval
+}
+
+define <2 x float> @test_simplify11v(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify11v(
+; ANY-NEXT:    ret <2 x float> [[X:%.*]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+; MSVC-NEXT:   ret <2 x float> [[POW]]
+;
+  %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.0, float 1.0>)
+  ret <2 x float> %retval
+}
+
+define double @test_simplify12(double %x) {
+; CHECK-LABEL: @test_simplify12(
+; CHECK-NEXT:  ret double [[X:%.*]]
+;
+  %retval = call double @pow(double %x, double 1.0)
+  ret double %retval
+}
+
+define <2 x double> @test_simplify12v(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify12v(
+; ANY-NEXT:    ret <2 x double> [[X:%.*]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> <double 1.000000e+00, double 1.000000e+00>)
+; MSVC-NEXT:   ret <2 x double> [[POW]]
+;
+  %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 1.0, double 1.0>)
+  ret <2 x double> %retval
+}
+
+; Check pow(x, 2.0) -> x*x.
+
+define float @pow2_strict(float %x) {
+; CHECK-LABEL: @pow2_strict(
+; ANY-NEXT:    [[SQUARE:%.*]] = fmul float [[X:%.*]], [[X]]
+; ANY-NEXT:    ret float [[SQUARE]]
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float 2.000000e+00)
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float 2.000000e+00)
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   [[SQUARE:%.*]] = fmul float [[X:%.*]], [[X]]
+; VC64-NEXT:   ret float [[SQUARE]]
+; VC83-NEXT:   [[SQUARE:%.*]] = fmul float [[X:%.*]], [[X]]
+; VC83-NEXT:   ret float [[SQUARE]]
+;
+  %r = call float @powf(float %x, float 2.0)
+  ret float %r
+}
+
+define <2 x float> @pow2_strictv(<2 x float> %x) {
+; CHECK-LABEL: @pow2_strictv(
+; ANY-NEXT:    [[SQUARE:%.*]] = fmul <2 x float> [[X:%.*]], [[X]]
+; ANY-NEXT:    ret <2 x float> [[SQUARE]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 2.000000e+00, float 2.000000e+00>)
+; MSVC-NEXT:   ret <2 x float> [[POW]]
+;
+  %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 2.0, float 2.0>)
+  ret <2 x float> %r
+}
+
+define double @pow2_double_strict(double %x) {
+; CHECK-LABEL: @pow2_double_strict(
+; CHECK-NEXT:  [[SQUARE:%.*]] = fmul double [[X:%.*]], [[X]]
+; CHECK-NEXT:  ret double [[SQUARE]]
+;
+  %r = call double @pow(double %x, double 2.0)
+  ret double %r
+}
+
+define <2 x double> @pow2_double_strictv(<2 x double> %x) {
+; CHECK-LABEL: @pow2_double_strictv(
+; ANY-NEXT:    [[SQUARE:%.*]] = fmul <2 x double> [[X:%.*]], [[X]]
+; ANY-NEXT:    ret <2 x double> [[SQUARE]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> <double 2.000000e+00, double 2.000000e+00>)
+; MSVC-NEXT:   ret <2 x double> [[POW]]
+;
+  %r = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 2.0, double 2.0>)
+  ret <2 x double> %r
+}
+
+; Don't drop the FMF - PR35601 ( https://bugs.llvm.org/show_bug.cgi?id=35601 )
+
+define float @pow2_fast(float %x) {
+; CHECK-LABEL: @pow2_fast(
+; ANY-NEXT:    [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; ANY-NEXT:    ret float [[SQUARE]]
+; VC32-NEXT:   [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 2.000000e+00)
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 2.000000e+00)
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; VC64-NEXT:   ret float [[SQUARE]]
+; VC83-NEXT:   [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; VC83-NEXT:   ret float [[SQUARE]]
+;
+  %r = call fast float @powf(float %x, float 2.0)
+  ret float %r
+}
+
+; Check pow(x, -1.0) -> 1.0/x.
+
+define float @pow_neg1_strict(float %x) {
+; CHECK-LABEL: @pow_neg1_strict(
+; ANY-NEXT:    [[RECIPROCAL:%.*]] = fdiv float 1.000000e+00, [[X:%.*]]
+; ANY-NEXT:    ret float [[RECIPROCAL]]
+; VC32-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float -1.000000e+00)
+; VC32-NEXT:   ret float [[POW]]
+; VC51-NEXT:   [[POW:%.*]] = call float @powf(float [[X:%.*]], float -1.000000e+00)
+; VC51-NEXT:   ret float [[POW]]
+; VC64-NEXT:   [[RECIPROCAL:%.*]] = fdiv float 1.000000e+00, [[X:%.*]]
+; VC64-NEXT:   ret float [[RECIPROCAL]]
+; VC83-NEXT:   [[RECIPROCAL:%.*]] = fdiv float 1.000000e+00, [[X:%.*]]
+; VC83-NEXT:   ret float [[RECIPROCAL]]
+;
+  %r = call float @powf(float %x, float -1.0)
+  ret float %r
+}
+
+define <2 x float> @pow_neg1_strictv(<2 x float> %x) {
+; CHECK-LABEL: @pow_neg1_strictv(
+; ANY-NEXT:    [[RECIPROCAL:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X:%.*]]
+; ANY-NEXT:    ret <2 x float> [[RECIPROCAL]]
+; MSVC-NEXT:   [[POW:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[X:%.*]], <2 x float> <float -1.000000e+00, float -1.000000e+00>)
+; MSVC-NEXT:   ret <2 x float> [[POW]]
+;
+  %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
+  ret <2 x float> %r
+}
+
+define double @pow_neg1_double_fast(double %x) {
+; CHECK-LABEL: @pow_neg1_double_fast(
+; CHECK-NEXT:  [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[X:%.*]]
+; CHECK-NEXT:  ret double [[RECIPROCAL]]
+;
+  %r = call fast double @pow(double %x, double -1.0)
+  ret double %r
+}
+
+define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) {
+; CHECK-LABEL: @pow_neg1_double_fastv(
+; ANY-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[X:%.*]]
+; ANY-NEXT:    ret <2 x double> [[RECIPROCAL]]
+; MSVC-NEXT:   [[POW:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> <double -1.000000e+00, double -1.000000e+00>)
+; MSVC-NEXT:   ret <2 x double> [[POW]]
+;
+  %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -1.0, double -1.0>)
+  ret <2 x double> %r
+}
+
+define double @test_simplify17(double %x) {
+; CHECK-LABEL: @test_simplify17(
+; CHECK-NEXT:  [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:  [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT:  [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:  [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]]
+; CHECK-NEXT:  ret double [[TMP1]]
+;
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+  ret double %retval
+}
+
+; Check pow(10.0, x) -> __exp10(x) on OS X 10.9+ and iOS 7.0+.
+
+define float @test_simplify18(float %x) {
+; CHECK-LABEL:          @test_simplify18(
+; CHECK-EXP10-NEXT:     [[__EXP10F:%.*]] = call float @__exp10f(float [[X:%.*]])
+; CHECK-EXP10-NEXT:     ret float [[__EXP10F]]
+; CHECK-NO-EXP10-NEXT:  [[RETVAL:%.*]] = call float @powf(float 1.000000e+01, float [[X:%.*]])
+; CHECK-NO-EXP10-NEXT:  ret float [[RETVAL]]
+;
+  %retval = call float @powf(float 10.0, float %x)
+  ret float %retval
+}
+
+define double @test_simplify19(double %x) {
+; CHECK-LABEL:          @test_simplify19(
+; CHECK-EXP10-NEXT:     [[__EXP10:%.*]] = call double @__exp10(double [[X:%.*]])
+; CHECK-EXP10-NEXT:     ret double [[__EXP10]]
+; CHECK-NO-EXP10-NEXT:  [[RETVAL:%.*]] = call double @pow(double 1.000000e+01, double [[X:%.*]])
+; CHECK-NO-EXP10-NEXT:  ret double [[RETVAL]]
+;
+  %retval = call double @pow(double 10.0, double %x)
+  ret double %retval
+}
diff --git a/llvm/test/Transforms/InstCombine/pow-2.ll b/llvm/test/Transforms/InstCombine/pow-2.ll
new file mode 100644
index 00000000000..bd5178a3795
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pow-2.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test that the pow library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare float @pow(double, double)
+
+; Check that pow functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(double %x) {
+; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT:    [[RETVAL:%.*]] = call float @pow(double 1.000000e+00, double [[X:%.*]])
+; CHECK-NEXT:    ret float [[RETVAL]]
+;
+  %retval = call float @pow(double 1.0, double %x)
+  ret float %retval
+}
+
diff --git a/llvm/test/Transforms/InstCombine/pow-3.ll b/llvm/test/Transforms/InstCombine/pow-3.ll
new file mode 100644
index 00000000000..d0edd465875
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pow-3.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test that the pow() won't get simplified to when it's disabled.
+;
+; RUN: opt < %s -disable-simplify-libcalls -instcombine -S | FileCheck %s
+
+declare double @llvm.pow.f64(double, double)
+declare double @pow(double, double)
+
+define double @test_simplify_unavailable1(double %x) {
+; CHECK-LABEL: @test_simplify_unavailable1(
+; CHECK-NEXT:    [[RETVAL:%.*]] = call double @llvm.pow.f64(double [[X:%.*]], double 5.000000e-01)
+; CHECK-NEXT:    ret double [[RETVAL]]
+;
+  %retval = call double @llvm.pow.f64(double %x, double 0.5)
+  ret double %retval
+}
+
+; Shrinking is disabled too.
+
+define float @test_simplify_unavailable2(float %f, float %g) {
+; CHECK-LABEL: @test_simplify_unavailable2(
+; CHECK-NEXT:    [[DF:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[DG:%.*]] = fpext float [[G:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @pow(double [[DF]], double [[DG]])
+; CHECK-NEXT:    [[FR:%.*]] = fptrunc double [[CALL]] to float
+; CHECK-NEXT:    ret float [[FR]]
+;
+  %df = fpext float %f to double
+  %dg = fpext float %g to double
+  %call = call fast double @pow(double %df, double %dg)
+  %fr = fptrunc double %call to float
+  ret float %fr
+}
+
+; Shrinking is disabled for the intrinsic too.
+
+define float @test_simplify_unavailable3(float %f, float %g) {
+; CHECK-LABEL: @test_simplify_unavailable3(
+; CHECK-NEXT:    [[DF:%.*]] = fpext float [[F:%.*]] to double
+; CHECK-NEXT:    [[DG:%.*]] = fpext float [[G:%.*]] to double
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @llvm.pow.f64(double [[DF]], double [[DG]])
+; CHECK-NEXT:    [[FR:%.*]] = fptrunc double [[CALL]] to float
+; CHECK-NEXT:    ret float [[FR]]
+;
+  %df = fpext float %f to double
+  %dg = fpext float %g to double
+  %call = call fast double @llvm.pow.f64(double %df, double %dg)
+  %fr = fptrunc double %call to float
+  ret float %fr
+}
diff --git a/llvm/test/Transforms/InstCombine/pow-4.ll b/llvm/test/Transforms/InstCombine/pow-4.ll
new file mode 100644
index 00000000000..53d53b84569
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pow-4.ll
@@ -0,0 +1,225 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare double @llvm.pow.f64(double, double)
+declare float @llvm.pow.f32(float, float)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+declare double @pow(double, double)
+
+; pow(x, 3.0)
+define double @test_simplify_3(double %x) {
+; CHECK-LABEL: @test_simplify_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[X]]
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00)
+  ret double %1
+}
+
+; powf(x, 4.0)
+define float @test_simplify_4f(float %x) {
+; CHECK-LABEL: @test_simplify_4f(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00)
+  ret float %1
+}
+
+; pow(x, 4.0)
+define double @test_simplify_4(double %x) {
+; CHECK-LABEL: @test_simplify_4(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00)
+  ret double %1
+}
+
+; powf(x, <15.0, 15.0>)
+define <2 x float> @test_simplify_15(<2 x float> %x) {
+; CHECK-LABEL: @test_simplify_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret <2 x float> [[TMP5]]
+;
+  %1 = call fast <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.500000e+01, float 1.500000e+01>)
+  ret <2 x float> %1
+}
+
+; pow(x, -7.0)
+define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify_neg_7(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], [[X]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
+; CHECK-NEXT:    ret <2 x double> [[TMP5]]
+;
+  %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -7.000000e+00, double -7.000000e+00>)
+  ret <2 x double> %1
+}
+
+; powf(x, -19.0)
+define float @test_simplify_neg_19(float %x) {
+; CHECK-LABEL: @test_simplify_neg_19(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast float [[TMP5]], [[X]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]]
+; CHECK-NEXT:    ret float [[TMP7]]
+;
+  %1 = call fast float @llvm.pow.f32(float %x, float -1.900000e+01)
+  ret float %1
+}
+
+; pow(x, 11.23)
+define double @test_simplify_11_23(double %x) {
+; CHECK-LABEL: @test_simplify_11_23(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 1.123000e+01)
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = call fast double @llvm.pow.f64(double %x, double 1.123000e+01)
+  ret double %1
+}
+
+; powf(x, 32.0)
+define float @test_simplify_32(float %x) {
+; CHECK-LABEL: @test_simplify_32(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], [[TMP4]]
+; CHECK-NEXT:    ret float [[TMP5]]
+;
+  %1 = call fast float @llvm.pow.f32(float %x, float 3.200000e+01)
+  ret float %1
+}
+
+; pow(x, 33.0)
+define double @test_simplify_33(double %x) {
+; CHECK-LABEL: @test_simplify_33(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 3.300000e+01)
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = call fast double @llvm.pow.f64(double %x, double 3.300000e+01)
+  ret double %1
+}
+
+; pow(x, 16.5) with double
+define double @test_simplify_16_5(double %x) {
+; CHECK-LABEL: @test_simplify_16_5(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = call fast double @llvm.pow.f64(double %x, double 1.650000e+01)
+  ret double %1
+}
+
+; pow(x, -16.5) with double
+define double @test_simplify_neg_16_5(double %x) {
+; CHECK-LABEL: @test_simplify_neg_16_5(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]]
+; CHECK-NEXT:    ret double [[RECIPROCAL]]
+;
+  %1 = call fast double @llvm.pow.f64(double %x, double -1.650000e+01)
+  ret double %1
+}
+
+; pow(x, 16.5) with double
+define double @test_simplify_16_5_libcall(double %x) {
+; CHECK-LABEL: @test_simplify_16_5_libcall(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = call fast double @pow(double %x, double 1.650000e+01)
+  ret double %1
+}
+
+; pow(x, -16.5) with double
+define double @test_simplify_neg_16_5_libcall(double %x) {
+; CHECK-LABEL: @test_simplify_neg_16_5_libcall(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]]
+; CHECK-NEXT:    ret double [[RECIPROCAL]]
+;
+  %1 = call fast double @pow(double %x, double -1.650000e+01)
+  ret double %1
+}
+
+; pow(x, -8.5) with float
+define float @test_simplify_neg_8_5(float %x) {
+; CHECK-LABEL: @test_simplify_neg_8_5(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast float [[X]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[SQRT]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP2]]
+; CHECK-NEXT:    ret float [[RECIPROCAL]]
+;
+  %1 = call fast float @llvm.pow.f32(float %x, float -0.450000e+01)
+  ret float %1
+}
+
+; pow(x, 7.5) with <2 x double>
+define <2 x double> @test_simplify_7_5(<2 x double> %x) {
+; CHECK-LABEL: @test_simplify_7_5(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[SQUARE:%.*]] = fmul fast <2 x double> [[X]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x double> [[TMP3]], [[SQRT]]
+; CHECK-NEXT:    ret <2 x double> [[TMP4]]
+;
+  %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 7.500000e+00, double 7.500000e+00>)
+  ret <2 x double> %1
+}
+
+; pow(x, 3.5) with <4 x float>
+define <4 x float> @test_simplify_3_5(<4 x float> %x) {
+; CHECK-LABEL: @test_simplify_3_5(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <4 x float> [[X]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[SQRT]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
+  %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 3.500000e+00, float 3.500000e+00, float 3.500000e+00, float 3.500000e+00>)
+  ret <4 x float> %1
+}
+
diff --git a/llvm/test/Transforms/InstCombine/pow-cbrt.ll b/llvm/test/Transforms/InstCombine/pow-cbrt.ll
new file mode 100644
index 00000000000..00fa510b04e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pow-cbrt.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @pow_intrinsic_third_fast(double %x) {
+; CHECK-LABEL: @pow_intrinsic_third_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call fast double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+  ret double %pow
+}
+
+define float @powf_intrinsic_third_fast(float %x) {
+; CHECK-LABEL: @powf_intrinsic_third_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call fast float @llvm.pow.f32(float %x, float 0x3fd5555560000000)
+  ret float %pow
+}
+
+define double @pow_intrinsic_third_approx(double %x) {
+; CHECK-LABEL: @pow_intrinsic_third_approx(
+; CHECK-NEXT:    [[POW:%.*]] = call afn double @llvm.pow.f64(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+  ret double %pow
+}
+
+define float @powf_intrinsic_third_approx(float %x) {
+; CHECK-LABEL: @powf_intrinsic_third_approx(
+; CHECK-NEXT:    [[POW:%.*]] = call afn float @llvm.pow.f32(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call afn float @llvm.pow.f32(float %x, float 0x3fd5555560000000)
+  ret float %pow
+}
+
+define double @pow_libcall_third_fast(double %x) {
+; CHECK-LABEL: @pow_libcall_third_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0x3FD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call fast double @pow(double %x, double 0x3fd5555555555555)
+  ret double %pow
+}
+
+define float @powf_libcall_third_fast(float %x) {
+; CHECK-LABEL: @powf_libcall_third_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0x3FD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call fast float @powf(float %x, float 0x3fd5555560000000)
+  ret float %pow
+}
+
+define double @pow_intrinsic_negthird_fast(double %x) {
+; CHECK-LABEL: @pow_intrinsic_negthird_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @llvm.pow.f64(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call fast double @llvm.pow.f64(double %x, double 0xbfd5555555555555)
+  ret double %pow
+}
+
+define float @powf_intrinsic_negthird_fast(float %x) {
+; CHECK-LABEL: @powf_intrinsic_negthird_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call fast float @llvm.pow.f32(float %x, float 0xbfd5555560000000)
+  ret float %pow
+}
+
+define double @pow_intrinsic_negthird_approx(double %x) {
+; CHECK-LABEL: @pow_intrinsic_negthird_approx(
+; CHECK-NEXT:    [[POW:%.*]] = call afn double @llvm.pow.f64(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call afn double @llvm.pow.f64(double %x, double 0xbfd5555555555555)
+  ret double %pow
+}
+
+define float @powf_intrinsic_negthird_approx(float %x) {
+; CHECK-LABEL: @powf_intrinsic_negthird_approx(
+; CHECK-NEXT:    [[POW:%.*]] = call afn float @llvm.pow.f32(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call afn float @llvm.pow.f32(float %x, float 0xbfd5555560000000)
+  ret float %pow
+}
+
+define double @pow_libcall_negthird_fast(double %x) {
+; CHECK-LABEL: @pow_libcall_negthird_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0xBFD5555555555555)
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %pow = call fast double @pow(double %x, double 0xbfd5555555555555)
+  ret double %pow
+}
+
+define float @powf_libcall_negthird_fast(float %x) {
+; CHECK-LABEL: @powf_libcall_negthird_fast(
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0xBFD5555560000000)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = call fast float @powf(float %x, float 0xbfd5555560000000)
+  ret float %pow
+}
+
+declare double @llvm.pow.f64(double, double) #0
+declare float @llvm.pow.f32(float, float) #0
+declare double @pow(double, double)
+declare float @powf(float, float)
+
+attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/Transforms/InstCombine/pow-exp-nofastmath.ll b/llvm/test/Transforms/InstCombine/pow-exp-nofastmath.ll
new file mode 100644
index 00000000000..ef9b3a6a78d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pow-exp-nofastmath.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) {
+; CHECK-LABEL: @mypow(
+; CHECK-NEXT:    [[CALL:%.*]] = call double @exp(double [[X:%.*]])
+; CHECK-NEXT:    [[POW:%.*]] = call double @llvm.pow.f64(double [[CALL]], double [[Y:%.*]])
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %call = call double @exp(double %x)
+  %pow = call double @llvm.pow.f64(double %call, double %y)
+  ret double %pow
+}
+
+declare double @exp(double) #1
+declare double @llvm.pow.f64(double, double)
diff --git a/llvm/test/Transforms/InstCombine/pow-exp.ll b/llvm/test/Transforms/InstCombine/pow-exp.ll
new file mode 100644
index 00000000000..751774fb9c8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pow-exp.ll
@@ -0,0 +1,222 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @powf_expf(float %x, float %y) {
+; CHECK-LABEL: @powf_expf(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP:%.*]] = call fast float @llvm.exp.f32(float [[MUL]])
+; CHECK-NEXT:    ret float [[EXP]]
+;
+  %call = call fast float @expf(float %x) nounwind readnone
+  %pow = call fast float @llvm.pow.f32(float %call, float %y)
+  ret float %pow
+}
+
+define float @powf_expf_libcall(float %x, float %y) {
+; CHECK-LABEL: @powf_expf_libcall(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXPF:%.*]] = call fast float @expf(float [[MUL]])
+; CHECK-NEXT:    ret float [[EXPF]]
+;
+  %call = call fast float @expf(float %x)
+  %pow = call fast float @powf(float %call, float %y)
+  ret float %pow
+}
+
+define double @pow_exp(double %x, double %y) {
+; CHECK-LABEL: @pow_exp(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP:%.*]] = call fast double @llvm.exp.f64(double [[MUL]])
+; CHECK-NEXT:    ret double [[EXP]]
+;
+  %call = call fast double @exp(double %x) nounwind readnone
+  %pow = call fast double @llvm.pow.f64(double %call, double %y)
+  ret double %pow
+}
+
+define double @pow_exp_not_intrinsic(double %x, double %y) {
+; CHECK-LABEL: @pow_exp_not_intrinsic(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP:%.*]] = call fast double @llvm.exp.f64(double [[MUL]])
+; CHECK-NEXT:    ret double [[EXP]]
+;
+  %call = call fast double @exp(double %x) nounwind readnone
+  %pow = call fast double @pow(double %call, double %y) nounwind readnone
+  ret double %pow
+}
+
+define fp128 @powl_expl(fp128 %x, fp128 %y) {
+; CHECK-LABEL: @powl_expl(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast fp128 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP:%.*]] = call fast fp128 @llvm.exp.f128(fp128 [[MUL]])
+; CHECK-NEXT:    ret fp128 [[EXP]]
+;
+  %call = call fast fp128 @expl(fp128 %x) nounwind readnone
+  %pow = call fast fp128 @llvm.pow.f128(fp128 %call, fp128 %y)
+  ret fp128 %pow
+}
+
+define fp128 @powl_expl_not_fast(fp128 %x, fp128 %y) {
+; CHECK-LABEL: @powl_expl_not_fast(
+; CHECK-NEXT:    [[CALL:%.*]] = call fp128 @expl(fp128 [[X:%.*]])
+; CHECK-NEXT:    [[POW:%.*]] = call fast fp128 @llvm.pow.f128(fp128 [[CALL]], fp128 [[Y:%.*]])
+; CHECK-NEXT:    ret fp128 [[POW]]
+;
+  %call = call fp128 @expl(fp128 %x)
+  %pow = call fast fp128 @llvm.pow.f128(fp128 %call, fp128 %y)
+  ret fp128 %pow
+}
+
+define float @powf_exp2f(float %x, float %y) {
+; CHECK-LABEL: @powf_exp2f(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %call = call fast float @exp2f(float %x) nounwind readnone
+  %pow = call fast float @llvm.pow.f32(float %call, float %y)
+  ret float %pow
+}
+
+define float @powf_exp2f_not_intrinsic(float %x, float %y) {
+; CHECK-LABEL: @powf_exp2f_not_intrinsic(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[MUL]])
+; CHECK-NEXT:    ret float [[EXP2]]
+;
+  %call = call fast float @exp2f(float %x) nounwind readnone
+  %pow = call fast float @powf(float %call, float %y) nounwind readnone
+  ret float %pow
+}
+
+define double @pow_exp2(double %x, double %y) {
+; CHECK-LABEL: @pow_exp2(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP2:%.*]] = call fast double @llvm.exp2.f64(double [[MUL]])
+; CHECK-NEXT:    ret double [[EXP2]]
+;
+  %call = call fast double @exp2(double %x) nounwind readnone
+  %pow = call fast double @llvm.pow.f64(double %call, double %y)
+  ret double %pow
+}
+
+define double @pow_exp2_libcall(double %x, double %y) {
+; CHECK-LABEL: @pow_exp2_libcall(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP2:%.*]] = call fast double @exp2(double [[MUL]])
+; CHECK-NEXT:    ret double [[EXP2]]
+;
+  %call = call fast double @exp2(double %x)
+  %pow = call fast double @pow(double %call, double %y)
+  ret double %pow
+}
+
+define fp128 @powl_exp2l(fp128 %x, fp128 %y) {
+; CHECK-LABEL: @powl_exp2l(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast fp128 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP2:%.*]] = call fast fp128 @llvm.exp2.f128(fp128 [[MUL]])
+; CHECK-NEXT:    ret fp128 [[EXP2]]
+;
+  %call = call fast fp128 @exp2l(fp128 %x) nounwind readnone
+  %pow = call fast fp128 @llvm.pow.f128(fp128 %call, fp128 %y)
+  ret fp128 %pow
+}
+
+define fp128 @powl_exp2l_not_fast(fp128 %x, fp128 %y) {
+; CHECK-LABEL: @powl_exp2l_not_fast(
+; CHECK-NEXT:    [[CALL:%.*]] = call fp128 @exp2l(fp128 [[X:%.*]])
+; CHECK-NEXT:    [[POW:%.*]] = call fast fp128 @llvm.pow.f128(fp128 [[CALL]], fp128 [[Y:%.*]])
+; CHECK-NEXT:    ret fp128 [[POW]]
+;
+  %call = call fp128 @exp2l(fp128 %x)
+  %pow = call fast fp128 @llvm.pow.f128(fp128 %call, fp128 %y)
+  ret fp128 %pow
+}
+
+; TODO: exp10() is not widely enabled by many targets yet.
+
+define float @powf_exp10f(float %x, float %y) {
+; CHECK-LABEL: @powf_exp10f(
+; CHECK-NEXT:    [[CALL:%.*]] = call fast float @exp10f(float [[X:%.*]]) #1
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @llvm.pow.f32(float [[CALL]], float [[Y:%.*]])
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %call = call fast float @exp10f(float %x) nounwind readnone
+  %pow = call fast float @llvm.pow.f32(float %call, float %y)
+  ret float %pow
+}
+
+define double @pow_exp10(double %x, double %y) {
+; CHECK-LABEL: @pow_exp10(
+; CHECK-NEXT:    [[CALL:%.*]] = call fast double @exp10(double [[X:%.*]]) #1
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @llvm.pow.f64(double [[CALL]], double [[Y:%.*]])
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %call = call fast double @exp10(double %x) nounwind readnone
+  %pow = call fast double @llvm.pow.f64(double %call, double %y)
+  ret double %pow
+}
+
+define fp128 @pow_exp10l(fp128 %x, fp128 %y) {
+; CHECK-LABEL: @pow_exp10l(
+; CHECK-NEXT:    [[CALL:%.*]] = call fast fp128 @exp10l(fp128 [[X:%.*]]) #1
+; CHECK-NEXT:    [[POW:%.*]] = call fast fp128 @llvm.pow.f128(fp128 [[CALL]], fp128 [[Y:%.*]])
+; CHECK-NEXT:    ret fp128 [[POW]]
+;
+  %call = call fast fp128 @exp10l(fp128 %x) nounwind readnone
+  %pow = call fast fp128 @llvm.pow.f128(fp128 %call, fp128 %y)
+  ret fp128 %pow
+}
+
+define float @reuse_fast(float %x, float %y, float * %p) {
+; CHECK-LABEL: @reuse_fast(
+; CHECK-NEXT:    [[EXP:%.*]] = call fast float @expf(float [[X:%.*]])
+; CHECK-NEXT:    [[POW:%.*]] = call fast float @powf(float [[EXP]], float [[Y:%.*]])
+; CHECK-NEXT:    store float [[EXP]], float* [[P:%.*]], align 4
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %exp = call fast float @expf(float %x)
+  %pow = call fast float @powf(float %exp, float %y)
+  store float %exp, float *%p, align 4
+  ret float %pow
+}
+
+define fp128 @reuse_libcall(fp128 %x, fp128 %y, fp128 * %p) {
+; CHECK-LABEL: @reuse_libcall(
+; CHECK-NEXT:    [[EXP:%.*]] = call fp128 @expl(fp128 [[X:%.*]])
+; CHECK-NEXT:    [[POW:%.*]] = call fp128 @powl(fp128 [[EXP]], fp128 [[Y:%.*]])
+; CHECK-NEXT:    store fp128 [[EXP]], fp128* [[P:%.*]], align 16
+; CHECK-NEXT:    ret fp128 [[POW]]
+;
+  %exp = call fp128 @expl(fp128 %x)
+  %pow = call fp128 @powl(fp128 %exp, fp128 %y)
+  store fp128 %exp, fp128 *%p, align 16
+  ret fp128 %pow
+}
+
+define double @function_pointer(double ()* %fptr, double %p1) {
+; CHECK-LABEL: @function_pointer(
+; CHECK-NEXT:    [[CALL1:%.*]] = call fast double [[FPTR:%.*]]()
+; CHECK-NEXT:    [[POW:%.*]] = call fast double @llvm.pow.f64(double [[CALL1]], double [[P1:%.*]])
+; CHECK-NEXT:    ret double [[POW]]
+;
+  %call1 = call fast double %fptr()
+  %pow = call fast double @llvm.pow.f64(double %call1, double %p1)
+  ret double %pow
+}
+
+declare float @expf(float)
+declare double @exp(double)
+declare fp128 @expl(fp128)
+declare float @exp2f(float)
+declare double @exp2(double)
+declare fp128 @exp2l(fp128)
+declare float @exp10f(float)
+declare double @exp10(double)
+declare fp128 @exp10l(fp128)
+declare float @powf(float, float)
+declare double @pow(double, double)
+declare fp128 @powl(fp128, fp128)
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+declare fp128 @llvm.pow.f128(fp128, fp128)
diff --git a/llvm/test/Transforms/InstCombine/pow-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-sqrt.ll
new file mode 100644
index 00000000000..9fcca83c21f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pow-sqrt.ll
@@ -0,0 +1,297 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check the libcall and the intrinsic for each case with differing FMF.
+
+; The transform to sqrt is allowed as long as we deal with -0.0 and -INF.
+
+define double @pow_libcall_half_no_FMF(double %x) {
+; CHECK-LABEL: @pow_libcall_half_no_FMF(
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %pow = call double @pow(double %x, double 5.0e-01)
+  ret double %pow
+}
+
+define double @pow_intrinsic_half_no_FMF(double %x) {
+; CHECK-LABEL: @pow_intrinsic_half_no_FMF(
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %pow = call double @llvm.pow.f64(double %x, double 5.0e-01)
+  ret double %pow
+}
+
+; This makes no difference, but FMF are propagated.
+
+define double @pow_libcall_half_approx(double %x) {
+; CHECK-LABEL: @pow_libcall_half_approx(
+; CHECK-NEXT:    [[SQRT:%.*]] = call afn double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call afn double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp afn oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[ABS]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %pow = call afn double @pow(double %x, double 5.0e-01)
+  ret double %pow
+}
+
+define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) {
+; CHECK-LABEL: @pow_intrinsic_half_approx(
+; CHECK-NEXT:    [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], <double 0xFFF0000000000000, double 0xFFF0000000000000>
+; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[ISINF]], <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>, <2 x double> [[ABS]]
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %pow = call afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 5.0e-01, double 5.0e-01>)
+  ret <2 x double> %pow
+}
+
+define float @powf_intrinsic_half_fast(float %x) {
+; CHECK-LABEL: @powf_intrinsic_half_fast(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[SQRT]]
+;
+  %pow = call fast float @llvm.pow.f32(float %x, float 5.0e-01)
+  ret float %pow
+}
+
+; If we can disregard INFs, no need for a select.
+
+define double @pow_libcall_half_ninf(double %x) {
+; CHECK-LABEL: @pow_libcall_half_ninf(
+; CHECK-NEXT:    [[SQRT:%.*]] = call ninf double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call ninf double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT:    ret double [[ABS]]
+;
+  %pow = call ninf double @pow(double %x, double 5.0e-01)
+  ret double %pow
+}
+
+define <2 x double> @pow_intrinsic_half_ninf(<2 x double> %x) {
+; CHECK-LABEL: @pow_intrinsic_half_ninf(
+; CHECK-NEXT:    [[SQRT:%.*]] = call ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call ninf <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
+; CHECK-NEXT:    ret <2 x double> [[ABS]]
+;
+  %pow = call ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 5.0e-01, double 5.0e-01>)
+  ret <2 x double> %pow
+}
+
+; If we can disregard -0.0, no need for fabs.
+
+define double @pow_libcall_half_nsz(double %x) {
+; CHECK-LABEL: @pow_libcall_half_nsz(
+; CHECK-NEXT:    [[SQRT:%.*]] = call nsz double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[SQRT]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %pow = call nsz double @pow(double %x, double 5.0e-01)
+  ret double %pow
+}
+
+define double @pow_intrinsic_half_nsz(double %x) {
+; CHECK-LABEL: @pow_intrinsic_half_nsz(
+; CHECK-NEXT:    [[SQRT:%.*]] = call nsz double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ISINF]], double 0x7FF0000000000000, double [[SQRT]]
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %pow = call nsz double @llvm.pow.f64(double %x, double 5.0e-01)
+  ret double %pow
+}
+
+; This is just sqrt.
+
+define float @pow_libcall_half_ninf_nsz(float %x) {
+; CHECK-LABEL: @pow_libcall_half_ninf_nsz(
+; CHECK-NEXT:    [[SQRTF:%.*]] = call ninf nsz float @sqrtf(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[SQRTF]]
+;
+  %pow = call ninf nsz float @powf(float %x, float 5.0e-01)
+  ret float %pow
+}
+
+define double @pow_intrinsic_half_ninf_nsz(double %x) {
+; CHECK-LABEL: @pow_intrinsic_half_ninf_nsz(
+; CHECK-NEXT:    [[SQRT:%.*]] = call ninf nsz double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[SQRT]]
+;
+  %pow = call ninf nsz double @llvm.pow.f64(double %x, double 5.0e-01)
+  ret double %pow
+}
+
+; Overspecified FMF to test propagation to the new op(s).
+
+define float @pow_libcall_half_fast(float %x) {
+; CHECK-LABEL: @pow_libcall_half_fast(
+; CHECK-NEXT:    [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[SQRTF]]
+;
+  %pow = call fast float @powf(float %x, float 5.0e-01)
+  ret float %pow
+}
+
+define double @pow_intrinsic_half_fast(double %x) {
+; CHECK-LABEL: @pow_intrinsic_half_fast(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[SQRT]]
+;
+  %pow = call fast double @llvm.pow.f64(double %x, double 5.0e-01)
+  ret double %pow
+}
+
+; -0.5 means take the reciprocal.
+
+define float @pow_libcall_neghalf_no_FMF(float %x) {
+; CHECK-LABEL: @pow_libcall_neghalf_no_FMF(
+; CHECK-NEXT:    [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[ABS_OP:%.*]] = fdiv float 1.000000e+00, [[ABS]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select i1 [[ISINF]], float 0.000000e+00, float [[ABS_OP]]
+; CHECK-NEXT:    ret float [[RECIPROCAL]]
+;
+  %pow = call float @powf(float %x, float -5.0e-01)
+  ret float %pow
+}
+
+define <2 x double> @pow_intrinsic_neghalf_no_FMF(<2 x double> %x) {
+; CHECK-LABEL: @pow_intrinsic_neghalf_no_FMF(
+; CHECK-NEXT:    [[SQRT:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp oeq <2 x double> [[X]], <double 0xFFF0000000000000, double 0xFFF0000000000000>
+; CHECK-NEXT:    [[ABS_OP:%.*]] = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[ABS]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[ABS_OP]]
+; CHECK-NEXT:    ret <2 x double> [[RECIPROCAL]]
+;
+  %pow = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -5.0e-01, double -5.0e-01>)
+  ret <2 x double> %pow
+}
+
+; If we can disregard INFs, no need for a select.
+
+define double @pow_libcall_neghalf_ninf(double %x) {
+; CHECK-LABEL: @pow_libcall_neghalf_ninf(
+; CHECK-NEXT:    [[SQRT:%.*]] = call ninf double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call ninf double @llvm.fabs.f64(double [[SQRT]])
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv ninf double 1.000000e+00, [[ABS]]
+; CHECK-NEXT:    ret double [[RECIPROCAL]]
+;
+  %pow = call ninf double @pow(double %x, double -5.0e-01)
+  ret double %pow
+}
+
+define <2 x double> @pow_intrinsic_neghalf_ninf(<2 x double> %x) {
+; CHECK-LABEL: @pow_intrinsic_neghalf_ninf(
+; CHECK-NEXT:    [[SQRT:%.*]] = call ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call ninf <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv ninf <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[ABS]]
+; CHECK-NEXT:    ret <2 x double> [[RECIPROCAL]]
+;
+  %pow = call ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -5.0e-01, double -5.0e-01>)
+  ret <2 x double> %pow
+}
+
+; If we can disregard -0.0, no need for fabs.
+
+define double @pow_libcall_neghalf_nsz(double %x) {
+; CHECK-LABEL: @pow_libcall_neghalf_nsz(
+; CHECK-NEXT:    [[SQRT:%.*]] = call nsz double @sqrt(double [[X:%.*]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[SQRT_OP:%.*]] = fdiv nsz double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select i1 [[ISINF]], double 0.000000e+00, double [[SQRT_OP]]
+; CHECK-NEXT:    ret double [[RECIPROCAL]]
+;
+  %pow = call nsz double @pow(double %x, double -5.0e-01)
+  ret double %pow
+}
+
+define double @pow_intrinsic_neghalf_nsz(double %x) {
+; CHECK-LABEL: @pow_intrinsic_neghalf_nsz(
+; CHECK-NEXT:    [[SQRT:%.*]] = call nsz double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp nsz oeq double [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[SQRT_OP:%.*]] = fdiv nsz double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select i1 [[ISINF]], double 0.000000e+00, double [[SQRT_OP]]
+; CHECK-NEXT:    ret double [[RECIPROCAL]]
+;
+  %pow = call nsz double @llvm.pow.f64(double %x, double -5.0e-01)
+  ret double %pow
+}
+
+; This is just recip-sqrt.
+
+define double @pow_intrinsic_neghalf_ninf_nsz(double %x) {
+; CHECK-LABEL: @pow_intrinsic_neghalf_ninf_nsz(
+; CHECK-NEXT:    [[SQRT:%.*]] = call ninf nsz double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv ninf nsz double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    ret double [[RECIPROCAL]]
+;
+  %pow = call ninf nsz double @llvm.pow.f64(double %x, double -5.0e-01)
+  ret double %pow
+}
+
+define float @pow_libcall_neghalf_ninf_nsz(float %x) {
+; CHECK-LABEL: @pow_libcall_neghalf_ninf_nsz(
+; CHECK-NEXT:    [[SQRTF:%.*]] = call ninf nsz float @sqrtf(float [[X:%.*]])
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv ninf nsz float 1.000000e+00, [[SQRTF]]
+; CHECK-NEXT:    ret float [[RECIPROCAL]]
+;
+  %pow = call ninf nsz float @powf(float %x, float -5.0e-01)
+  ret float %pow
+}
+
+; Overspecified FMF to test propagation to the new op(s).
+
+define float @pow_libcall_neghalf_fast(float %x) {
+; CHECK-LABEL: @pow_libcall_neghalf_fast(
+; CHECK-NEXT:    [[SQRTF:%.*]] = call fast float @sqrtf(float [[X:%.*]])
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[SQRTF]]
+; CHECK-NEXT:    ret float [[RECIPROCAL]]
+;
+  %pow = call fast float @powf(float %x, float -5.0e-01)
+  ret float %pow
+}
+
+define float @powf_libcall_neghalf_approx(float %x) {
+; CHECK-LABEL: @powf_libcall_neghalf_approx(
+; CHECK-NEXT:    [[SQRTF:%.*]] = call afn float @sqrtf(float [[X:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = call afn float @llvm.fabs.f32(float [[SQRTF]])
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp afn oeq float [[X]], 0xFFF0000000000000
+; CHECK-NEXT:    [[ABS_OP:%.*]] = fdiv afn float 1.000000e+00, [[ABS]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select i1 [[ISINF]], float 0.000000e+00, float [[ABS_OP]]
+; CHECK-NEXT:    ret float [[RECIPROCAL]]
+;
+  %pow = call afn float @powf(float %x, float -5.0e-01)
+  ret float %pow
+}
+
+define double @pow_intrinsic_neghalf_fast(double %x) {
+; CHECK-LABEL: @pow_intrinsic_neghalf_fast(
+; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[SQRT]]
+; CHECK-NEXT:    ret double [[RECIPROCAL]]
+;
+  %pow = call fast double @llvm.pow.f64(double %x, double -5.0e-01)
+  ret double %pow
+}
+
+declare double @llvm.pow.f64(double, double) #0
+declare float @llvm.pow.f32(float, float) #0
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) #0
+declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) #0
+declare double @pow(double, double)
+declare float @powf(float, float)
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/pr12251.ll b/llvm/test/Transforms/InstCombine/pr12251.ll
new file mode 100644
index 00000000000..7197bda2e54
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr12251.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define zeroext i1 @_Z3fooPb(i8* nocapture %x) {
+entry:
+  %a = load i8, i8* %x, align 1, !range !0
+  %b = and i8 %a, 1
+  %tobool = icmp ne i8 %b, 0
+  ret i1 %tobool
+}
+
+; CHECK: %a = load i8, i8* %x, align 1, !range !0
+; CHECK-NEXT: %tobool = icmp ne i8 %a, 0
+; CHECK-NEXT: ret i1 %tobool
+
+!0 = !{i8 0, i8 2}
diff --git a/llvm/test/Transforms/InstCombine/pr12338.ll b/llvm/test/Transforms/InstCombine/pr12338.ll
new file mode 100644
index 00000000000..7e0bf59614c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr12338.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @entry() nounwind {
+entry:
+  br label %for.cond
+
+; CHECK: br label %for.cond
+for.cond:
+  %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
+  %phi3 = sub <1 x i32> zeroinitializer, %local
+  br label %cond.end
+
+cond.false:
+  br label %cond.end
+
+cond.end:
+  %cond = phi <1 x i32> [ %phi3, %for.cond ], [ undef, %cond.false ]
+  br label %cond.end47
+
+cond.end47:
+  %sum = add <1 x i32> %cond, <i32 92>
+  %phi2 = sub <1 x i32> zeroinitializer, %sum
+  br label %for.cond
+}
diff --git a/llvm/test/Transforms/InstCombine/pr17827.ll b/llvm/test/Transforms/InstCombine/pr17827.ll
new file mode 100644
index 00000000000..e9312fceeda
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr17827.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; With left shift, the comparison should not be modified.
+define i1 @test_shift_and_cmp_not_changed1(i8 %p) {
+; CHECK-LABEL: @test_shift_and_cmp_not_changed1(
+; CHECK-NEXT:    [[SHLP:%.*]] = shl i8 %p, 5
+; CHECK-NEXT:    [[ANDP:%.*]] = and i8 [[SHLP]], -64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[ANDP]], 32
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; With arithmetic right shift, the comparison should not be modified.
+define i1 @test_shift_and_cmp_not_changed2(i8 %p) {
+; CHECK-LABEL: @test_shift_and_cmp_not_changed2(
+; CHECK-NEXT:    [[SHLP:%.*]] = ashr i8 %p, 5
+; CHECK-NEXT:    [[ANDP:%.*]] = and i8 [[SHLP]], -64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[ANDP]], 32
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shlp = ashr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; This should simplify functionally to the left shift case.
+; The extra input parameter should be optimized away.
+define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) {
+; CHECK-LABEL: @test_shift_and_cmp_changed1(
+; CHECK-NEXT:    [[ANDP:%.*]] = shl i8 %p, 5
+; CHECK-NEXT:    [[SHL:%.*]] = and i8 [[ANDP]], -64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SHL]], 32
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %andp = and i8 %p, 6
+  %andq = and i8 %q, 8
+  %or = or i8 %andq, %andp
+  %shl = shl i8 %or, 5
+  %ashr = ashr i8 %shl, 5
+  %cmp = icmp slt i8 %ashr, 1
+  ret i1 %cmp
+}
+
+define <2 x i1> @test_shift_and_cmp_changed1_vec(<2 x i8> %p, <2 x i8> %q) {
+; CHECK-LABEL: @test_shift_and_cmp_changed1_vec(
+; CHECK-NEXT:    [[ANDP:%.*]] = shl <2 x i8> [[P:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[SHL:%.*]] = and <2 x i8> [[ANDP]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[SHL]], <i8 32, i8 32>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %andp = and <2 x i8> %p, <i8 6, i8 6>
+  %andq = and <2 x i8> %q, <i8 8, i8 8>
+  %or = or <2 x i8> %andq, %andp
+  %shl = shl <2 x i8> %or, <i8 5, i8 5>
+  %ashr = ashr <2 x i8> %shl, <i8 5, i8 5>
+  %cmp = icmp slt <2 x i8> %ashr, <i8 1, i8 1>
+  ret <2 x i1> %cmp
+}
+
+; Unsigned compare allows a transformation to compare against 0.
+define i1 @test_shift_and_cmp_changed2(i8 %p) {
+; CHECK-LABEL: @test_shift_and_cmp_changed2(
+; CHECK-NEXT:    [[ANDP:%.*]] = and i8 %p, 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[ANDP]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp ult i8 %andp, 32
+  ret i1 %cmp
+}
+
+define <2 x i1> @test_shift_and_cmp_changed2_vec(<2 x i8> %p) {
+; CHECK-LABEL: @test_shift_and_cmp_changed2_vec(
+; CHECK-NEXT:    [[ANDP:%.*]] = and <2 x i8> %p, <i8 6, i8 6>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ANDP]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shlp = shl <2 x i8> %p, <i8 5, i8 5>
+  %andp = and <2 x i8> %shlp, <i8 -64, i8 -64>
+  %cmp = icmp ult <2 x i8> %andp, <i8 32, i8 32>
+  ret <2 x i1> %cmp
+}
+
+; nsw on the shift should not affect the comparison.
+define i1 @test_shift_and_cmp_changed3(i8 %p) {
+; CHECK-LABEL: @test_shift_and_cmp_changed3(
+; CHECK-NEXT:    [[SHLP:%.*]] = shl nsw i8 %p, 5
+; CHECK-NEXT:    [[ANDP:%.*]] = and i8 [[SHLP]], -64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[ANDP]], 32
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shlp = shl nsw i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; Logical shift right allows a return true because the 'and' guarantees no bits are set.
+define i1 @test_shift_and_cmp_changed4(i8 %p) {
+; CHECK-LABEL: @test_shift_and_cmp_changed4(
+; CHECK-NEXT:    ret i1 true
+;
+  %shlp = lshr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
diff --git a/llvm/test/Transforms/InstCombine/pr19420.ll b/llvm/test/Transforms/InstCombine/pr19420.ll
new file mode 100644
index 00000000000..015f35eaaa5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr19420.ll
@@ -0,0 +1,89 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define <4 x i32> @test_FoldShiftByConstant_CreateSHL(<4 x i32> %in) {
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL(
+; CHECK-NEXT:    [[VSHL_N:%.*]] = mul <4 x i32> %in, <i32 0, i32 -32, i32 0, i32 -32>
+; CHECK-NEXT:    ret <4 x i32> [[VSHL_N]]
+;
+  %mul.i = mul <4 x i32> %in, <i32 0, i32 -1, i32 0, i32 -1>
+  %vshl_n = shl <4 x i32> %mul.i, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %vshl_n
+}
+
+define <8 x i16> @test_FoldShiftByConstant_CreateSHL2(<8 x i16> %in) {
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateSHL2(
+; CHECK-NEXT:    [[VSHL_N:%.*]] = mul <8 x i16> %in, <i16 0, i16 -32, i16 0, i16 -32, i16 0, i16 -32, i16 0, i16 -32>
+; CHECK-NEXT:    ret <8 x i16> [[VSHL_N]]
+;
+  %mul.i = mul <8 x i16> %in, <i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1>
+  %vshl_n = shl <8 x i16> %mul.i, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  ret <8 x i16> %vshl_n
+}
+
+define <16 x i8> @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) {
+; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd(
+; CHECK-NEXT:    [[VSRA_N2:%.*]] = mul <16 x i8> %in0, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
+; CHECK-NEXT:    [[VSHL_N:%.*]] = and <16 x i8> [[VSRA_N2]], <i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32>
+; CHECK-NEXT:    ret <16 x i8> [[VSHL_N]]
+;
+  %vsra_n = ashr <16 x i8> %in0, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  %tmp = add <16 x i8> %in0, %vsra_n
+  %vshl_n = shl <16 x i8> %tmp, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  ret <16 x i8> %vshl_n
+}
+
+define i32 @bar(i32 %x, i32 %y) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[B1:%.*]] = shl i32 %y, 4
+; CHECK-NEXT:    [[A2:%.*]] = add i32 [[B1]], %x
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[A2]], -16
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %a = lshr i32 %x, 4
+  %b = add i32 %a, %y
+  %c = shl i32 %b, 4
+  ret i32 %c
+}
+
+define <2 x i32> @bar_v2i32(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @bar_v2i32(
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i32> %y, <i32 5, i32 5>
+; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[B1]], %x
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i32> [[A2]], <i32 -32, i32 -32>
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
+  %a = lshr <2 x i32> %x, <i32 5, i32 5>
+  %b = add <2 x i32> %a, %y
+  %c = shl <2 x i32> %b, <i32 5, i32 5>
+  ret <2 x i32> %c
+}
+
+define i32 @foo(i32 %x, i32 %y) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[C1:%.*]] = shl i32 %y, 4
+; CHECK-NEXT:    [[X_MASK:%.*]] = and i32 %x, 128
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[X_MASK]], [[C1]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %a = lshr i32 %x, 4
+  %b = and i32 %a, 8
+  %c = add i32 %b, %y
+  %d = shl i32 %c, 4
+  ret i32 %d
+}
+
+define <2 x i32> @foo_v2i32(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @foo_v2i32(
+; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i32> %x, <i32 4, i32 4>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A]], <i32 8, i32 8>
+; CHECK-NEXT:    [[C:%.*]] = add <2 x i32> [[B]], %y
+; CHECK-NEXT:    [[D:%.*]] = shl <2 x i32> [[C]], <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i32> [[D]]
+;
+  %a = lshr <2 x i32> %x, <i32 4, i32 4>
+  %b = and <2 x i32> %a, <i32 8, i32 8>
+  %c = add <2 x i32> %b, %y
+  %d = shl <2 x i32> %c, <i32 4, i32 4>
+  ret <2 x i32> %d
+}
+
diff --git a/llvm/test/Transforms/InstCombine/pr20079.ll b/llvm/test/Transforms/InstCombine/pr20079.ll
new file mode 100644
index 00000000000..ce9c4deb06a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr20079.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+@b = internal global [1 x i32] zeroinitializer, align 4
+@c = internal global i32 0, align 4
+
+; CHECK-LABEL: @fn1
+; CHECK-NEXT: ret i32 0
+define i32 @fn1(i32 %a) {
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/InstCombine/pr20678.ll b/llvm/test/Transforms/InstCombine/pr20678.ll
new file mode 100644
index 00000000000..4b5fac79449
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr20678.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i1 @test1() {
+entry:
+  ret i1 icmp ne (i16 bitcast (<16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false> to i16), i16 0)
+}
+; CHECK-LABEL: define i1 @test1(
+; CHECK:  ret i1 true
diff --git a/llvm/test/Transforms/InstCombine/pr21199.ll b/llvm/test/Transforms/InstCombine/pr21199.ll
new file mode 100644
index 00000000000..e6599fb640d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr21199.ll
@@ -0,0 +1,25 @@
+; do not replace a 'select' with 'or' in 'select - cmp - br' sequence
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f(i32)
+
+define void @test(i32 %len) {
+entry:
+  %cmp = icmp ult i32 %len, 8
+  %cond = select i1 %cmp, i32 %len, i32 8
+  %cmp11 = icmp ult i32 0, %cond
+  br i1 %cmp11, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  tail call void @f(i32 %cond)
+  %inc = add i32 %i.02, 1
+  %cmp1 = icmp ult i32 %inc, %cond
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: select
+}
diff --git a/llvm/test/Transforms/InstCombine/pr21210.ll b/llvm/test/Transforms/InstCombine/pr21210.ll
new file mode 100644
index 00000000000..ac229a89ca5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr21210.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -instcombine -S | FileCheck %s
+; Checks that the select-icmp optimization is safe in two cases
+declare void @foo(i32)
+declare i32 @bar(i32)
+
+; don't replace 'cond' by 'len' in the home block ('bb') that
+; contains the select
+define void @test1(i32 %len) {
+entry:
+  br label %bb
+
+bb:
+  %cmp = icmp ult i32 %len, 8
+  %cond = select i1 %cmp, i32 %len, i32 8
+  call void @foo(i32 %cond)
+  %cmp11 = icmp eq i32 %cond, 8
+  br i1 %cmp11, label %for.end, label %bb
+
+for.end:
+  ret void
+; CHECK: select
+; CHECK: icmp eq i32 %cond, 8
+}
+
+; don't replace 'cond' by 'len' in a block ('b1') that dominates all uses
+; of the select outside the home block ('bb'), but can be reached from the home
+; block on another path ('bb -> b0 -> b1')
+define void @test2(i32 %len) {
+entry:
+  %0 = call i32 @bar(i32 %len);
+  %cmp = icmp ult i32 %len, 4
+  br i1 %cmp, label %bb, label %b1
+bb:
+  %cmp2 = icmp ult i32 %0, 2
+  %cond = select i1 %cmp2, i32 %len, i32 8
+  %cmp3 = icmp eq i32 %cond, 8
+  br i1 %cmp3, label %b0, label %b1
+
+b0:
+  call void @foo(i32 %len)
+  br label %b1
+
+b1:
+; CHECK: phi i32 [ %cond, %bb ], [ undef, %b0 ], [ %0, %entry ]
+  %1 = phi i32 [ %cond, %bb ], [ undef, %b0 ], [ %0, %entry ]
+  br label %ret
+
+ret:
+  call void @foo(i32 %1)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/pr21651.ll b/llvm/test/Transforms/InstCombine/pr21651.ll
new file mode 100644
index 00000000000..bc8fe617726
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr21651.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+define void @PR21651() {
+; CHECK-LABEL: @PR21651(
+; CHECK-NEXT:    switch i1 false, label %out [
+; CHECK-NEXT:    i1 false, label %out
+; CHECK-NEXT:    i1 true, label %out
+; CHECK-NEXT:    ]
+; CHECK:       out:
+; CHECK-NEXT:    ret void
+;
+  switch i2 0, label %out [
+  i2 0, label %out
+  i2 1, label %out
+  ]
+
+out:
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/pr21891.ll b/llvm/test/Transforms/InstCombine/pr21891.ll
new file mode 100644
index 00000000000..8194976b623
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr21891.ll
@@ -0,0 +1,18 @@
+; RUN: opt %s -instcombine
+
+define i32 @f(i32 %theNumber) {
+entry:
+  %cmp = icmp sgt i32 %theNumber, -1
+  call void @llvm.assume(i1 %cmp)
+  br i1 true, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %shl = shl nuw i32 %theNumber, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %phi = phi i32 [ %shl, %if.then ], [ undef, %entry ]
+  ret i32 %phi
+}
+
+declare void @llvm.assume(i1)
diff --git a/llvm/test/Transforms/InstCombine/pr23751.ll b/llvm/test/Transforms/InstCombine/pr23751.ll
new file mode 100644
index 00000000000..d7840be2f83
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr23751.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+@d = common global i32 0, align 4
+
+define i1 @f(i8 zeroext %p) #1 {
+; CHECK-NOT: ret i1 false
+  %1 = zext i8 %p to i32
+  %2 = load i32, i32* @d, align 4
+  %3 = or i32 %2, -2
+  %4 = add nsw i32 %3, %1
+  %5 = icmp ugt i32 %1, %4
+  ret i1 %5
+}
diff --git a/llvm/test/Transforms/InstCombine/pr23809.ll b/llvm/test/Transforms/InstCombine/pr23809.ll
new file mode 100644
index 00000000000..06c7ce20ba8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr23809.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; InstCombine should preserve the call to @llvm.assume.
+define i32 @icmp(i32 %a, i32 %b) {
+; CHECK-LABEL: @icmp(
+  %sum = add i32 %a, %b
+  %1 = icmp sge i32 %sum, 0
+  call void @llvm.assume(i1 %1)
+; CHECK: call void @llvm.assume
+  ret i32 %sum
+}
+
+define float @fcmp(float %a, float %b) {
+; CHECK-LABEL: @fcmp(
+  %sum = fadd float %a, %b
+  %1 = fcmp oge float %sum, 0.0
+  call void @llvm.assume(i1 %1)
+; CHECK: call void @llvm.assume
+  ret float %sum
+}
+
+declare void @llvm.assume(i1)
diff --git a/llvm/test/Transforms/InstCombine/pr24354.ll b/llvm/test/Transforms/InstCombine/pr24354.ll
new file mode 100644
index 00000000000..3b36fd1b74e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr24354.ll
@@ -0,0 +1,33 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; This used to crash opt
+
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+@a = common global i16 0, align 2
+@d = common global i32 0, align 4
+
+define void @fn3() {
+; CHECK: @fn3
+bb:
+  %tmp = load i32, i32* @c, align 4
+  %tmp1 = icmp eq i32 %tmp, 0
+  br i1 %tmp1, label %bb2, label %bb6
+
+bb2:                                              ; preds = %bb
+  %tmp3 = load i32, i32* @b, align 4
+  %tmp.i = add nsw i32 255, %tmp3
+  %tmp5 = icmp ugt i32 %tmp.i, 254
+  br label %bb6
+
+bb6:                                              ; preds = %bb, %bb2
+  %tmp7 = phi i1 [ true, %bb ], [ %tmp5, %bb2 ]
+  %tmp8 = zext i1 %tmp7 to i32
+  %tmp10 = icmp eq i32 %tmp8, 0
+  %tmp12 = load i16, i16* @a, align 2
+  %tmp14 = icmp ne i16 %tmp12, 0
+  %tmp16 = select i1 %tmp10, i1 false, i1 %tmp14
+  %tmp17 = zext i1 %tmp16 to i32
+  store i32 %tmp17, i32* @d, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/pr24605.ll b/llvm/test/Transforms/InstCombine/pr24605.ll
new file mode 100644
index 00000000000..4b7b36137e6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr24605.ll
@@ -0,0 +1,15 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i1 @f(i8* %a, i8 %b) {
+; CHECK-LABEL: @f(
+entry:
+  %or = or i8 %b, -117
+  %sub = add i8 %or, -1
+  store i8 %sub, i8* %a, align 1
+  %cmp = icmp ugt i8 %or, %sub
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
diff --git a/llvm/test/Transforms/InstCombine/pr25342.ll b/llvm/test/Transforms/InstCombine/pr25342.ll
new file mode 100644
index 00000000000..b9cc3755a20
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr25342.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%"struct.std::complex" = type { { float, float } }
+@dd = external global %"struct.std::complex", align 4
+@dd2 = external global %"struct.std::complex", align 4
+
+define void @_Z3fooi(i32 signext %n) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %ldd.sroa.0.0 = phi i32 [ 0, %entry ], [ %5, %for.body ]
+  %ldd.sroa.6.0 = phi i32 [ 0, %entry ], [ %7, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %0 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 0), align 4
+  %1 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1), align 4
+  %2 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 0), align 4
+  %3 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 1), align 4
+  %mul.i = fmul float %0, %2
+  %mul4.i = fmul float %1, %3
+  %sub.i = fsub float %mul.i, %mul4.i
+  %mul5.i = fmul float %1, %2
+  %mul6.i = fmul float %0, %3
+  %add.i4 = fadd float %mul5.i, %mul6.i
+  %4 = bitcast i32 %ldd.sroa.0.0 to float
+  %add.i = fadd float %sub.i, %4
+  %5 = bitcast float %add.i to i32
+  %6 = bitcast i32 %ldd.sroa.6.0 to float
+  %add4.i = fadd float %add.i4, %6
+  %7 = bitcast float %add4.i to i32
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  store i32 %ldd.sroa.0.0, i32* bitcast (%"struct.std::complex"* @dd to i32*), align 4
+  store i32 %ldd.sroa.6.0, i32* bitcast (float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1) to i32*), align 4
+  ret void
+
+; CHECK: phi float
+; CHECK: store float
+; CHECK-NOT: bitcast
+}
+
+
+define void @multi_phi(i32 signext %n) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %ldd.sroa.0.0 = phi i32 [ 0, %entry ], [ %9, %odd.bb ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %odd.bb ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %0 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 0), align 4
+  %1 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1), align 4
+  %2 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 0), align 4
+  %3 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 1), align 4
+  %mul.i = fmul float %0, %2
+  %mul4.i = fmul float %1, %3
+  %sub.i = fsub float %mul.i, %mul4.i
+  %4 = bitcast i32 %ldd.sroa.0.0 to float
+  %add.i = fadd float %sub.i, %4
+  %5 = bitcast float %add.i to i32
+  %inc = add nsw i32 %i.0, 1
+  %bit0 = and i32 %inc, 1
+  %even = icmp slt i32 %bit0, 1
+  br i1 %even, label %even.bb, label %odd.bb
+
+even.bb:
+  %6 = bitcast i32 %5 to float
+  %7 = fadd float %sub.i, %6
+  %8 = bitcast float %7 to i32
+  br label %odd.bb
+
+odd.bb:
+  %9 = phi i32 [ %5, %for.body ], [ %8, %even.bb ]
+  br label %for.cond
+
+for.end:
+  store i32 %ldd.sroa.0.0, i32* bitcast (%"struct.std::complex"* @dd to i32*), align 4
+  ret void
+
+; CHECK-LABEL: @multi_phi(
+; CHECK: phi float
+; CHECK: store float
+; CHECK-NOT: bitcast
+}
diff --git a/llvm/test/Transforms/InstCombine/pr25745.ll b/llvm/test/Transforms/InstCombine/pr25745.ll
new file mode 100644
index 00000000000..3bf9efc92b9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr25745.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Checking for a crash
+
+declare void @use.i1(i1 %val)
+declare void @use.i64(i64 %val)
+
+define i64 @f(i32 %x) {
+; CHECK-LABEL: @f(
+ entry:
+  %x.wide = sext i32 %x to i64
+  %minus.x = sub i32 0, %x
+  %minus.x.wide = sext i32 %minus.x to i64
+  %c = icmp slt i32 %x, 0
+  %val = select i1 %c, i64 %x.wide, i64 %minus.x.wide
+  call void @use.i1(i1 %c)
+  call void @use.i64(i64 %x.wide)
+  ret i64 %val
+; CHECK: ret i64 %val
+}
diff --git a/llvm/test/Transforms/InstCombine/pr2645-0.ll b/llvm/test/Transforms/InstCombine/pr2645-0.ll
new file mode 100644
index 00000000000..21bfa64a860
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr2645-0.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instcombine -S | grep "insertelement <4 x float> undef"
+
+; Instcombine should be able to prove that none of the
+; insertelement's first operand's elements are needed.
+
+define internal void @""(i8*) {
+; <label>:1
+        bitcast i8* %0 to i32*          ; <i32*>:2 [#uses=1]
+        load i32, i32* %2, align 1           ; <i32>:3 [#uses=1]
+        getelementptr i8, i8* %0, i32 4             ; <i8*>:4 [#uses=1]
+        bitcast i8* %4 to i32*          ; <i32*>:5 [#uses=1]
+        load i32, i32* %5, align 1           ; <i32>:6 [#uses=1]
+        br label %7
+
+; <label>:7             ; preds = %9, %1
+        %.01 = phi <4 x float> [ undef, %1 ], [ %12, %9 ]               ; <<4 x float>> [#uses=1]
+        %.0 = phi i32 [ %3, %1 ], [ %15, %9 ]           ; <i32> [#uses=3]
+        icmp slt i32 %.0, %6            ; <i1>:8 [#uses=1]
+        br i1 %8, label %9, label %16
+
+; <label>:9             ; preds = %7
+        sitofp i32 %.0 to float         ; <float>:10 [#uses=1]
+        insertelement <4 x float> %.01, float %10, i32 0                ; <<4 x float>>:11 [#uses=1]
+        shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> zeroinitializer             ; <<4 x float>>:12 [#uses=2]
+        getelementptr i8, i8* %0, i32 48            ; <i8*>:13 [#uses=1]
+        bitcast i8* %13 to <4 x float>*         ; <<4 x float>*>:14 [#uses=1]
+        store <4 x float> %12, <4 x float>* %14, align 16
+        add i32 %.0, 2          ; <i32>:15 [#uses=1]
+        br label %7
+
+; <label>:16            ; preds = %7
+        ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/pr26992.ll b/llvm/test/Transforms/InstCombine/pr26992.ll
new file mode 100644
index 00000000000..e5bfb5c0e40
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr26992.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target triple = "x86_64-pc-windows-msvc"
+
+define i1 @test1(i8* %p) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %a = getelementptr i8, i8* %p, i64 1
+  invoke void @may_throw()
+          to label %invoke.cont unwind label %catch.dispatch
+
+invoke.cont:
+  %b = getelementptr inbounds i8, i8* %a, i64 1
+  invoke void @may_throw()
+          to label %exit unwind label %catch.dispatch
+
+catch.dispatch:
+  %c = phi i8* [ %b, %invoke.cont ], [ %a, %entry ]
+  %tmp1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %tmp2 = catchpad within %tmp1 [i8* null, i32 64, i8* null]
+  catchret from %tmp2 to label %exit
+
+exit:
+  %d = phi i8* [ %a, %invoke.cont ], [ %c, %catch ]
+  %cmp = icmp eq i8* %d, %a
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: define i1 @test1(
+; CHECK:  %[[gep_a:.*]] = getelementptr i8, i8* %p, i64 1
+; CHECK:  %[[gep_b:.*]] = getelementptr inbounds i8, i8* %p, i64 2
+; CHECK:  phi i8* [ %[[gep_b]], {{.*}} ], [ %[[gep_a]], {{.*}} ]
+; CHECK:  %tmp1 = catchswitch within none [label %catch] unwind to caller
+
+declare void @may_throw()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/Transforms/InstCombine/pr26993.ll b/llvm/test/Transforms/InstCombine/pr26993.ll
new file mode 100644
index 00000000000..14b33d10cc3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr26993.ll
@@ -0,0 +1,24 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define double @test1() {
+  %sin = call double @__sinpi(double 1.0)
+  ret double %sin
+}
+
+; CHECK-LABEL: define double @test1(
+; CHECK: %[[sin:.*]] = call double @__sinpi(double 1.000000e+00)
+; CHECK-NEXT: ret double %[[sin]]
+
+define double @test2() {
+  %cos = call double @__cospi(double 1.0)
+  ret double %cos
+}
+
+; CHECK-LABEL: define double @test2(
+; CHECK: %[[cos:.*]] = call double @__cospi(double 1.000000e+00)
+; CHECK-NEXT: ret double %[[cos]]
+
+declare double @__sinpi(double %x) #0
+declare double @__cospi(double %x) #0
+
+attributes #0 = { readnone nounwind }
diff --git a/llvm/test/Transforms/InstCombine/pr27236.ll b/llvm/test/Transforms/InstCombine/pr27236.ll
new file mode 100644
index 00000000000..f55ee0bffd8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr27236.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define float @test1(i32 %scale) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[SCALE:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[SCALE]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %1 = icmp sgt i32 1, %scale
+  %2 = select i1 %1, i32 1, i32 %scale
+  %3 = sitofp i32 %2 to float
+  %4 = icmp sgt i32 %2, 0
+  %sel = select i1 %4, float %3, float 0.000000e+00
+  ret float %sel
+}
diff --git a/llvm/test/Transforms/InstCombine/pr27332.ll b/llvm/test/Transforms/InstCombine/pr27332.ll
new file mode 100644
index 00000000000..87e440eed1c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr27332.ll
@@ -0,0 +1,23 @@
+; RUN: opt -instcombine -S -o - < %s | FileCheck %s
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+
+define <4 x i1> @test1(<4 x float> %V) {
+entry:
+  %abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %V)
+  %cmp = fcmp olt <4 x float> %abs, zeroinitializer
+  ret <4 x i1> %cmp
+}
+; CHECK-LABEL: define <4 x i1> @test1(
+; CHECK:   ret <4 x i1> zeroinitializer
+
+declare float @fabsf()
+
+define i1 @test2() {
+  %call = call float @fabsf()
+  %cmp = fcmp olt float %call, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: define i1 @test2(
+; CHECK:  %[[call:.*]] = call float @fabsf()
+; CHECK:  %[[cmp:.*]] = fcmp olt float %[[call]], 0.000000e+00
+; CHECK:  ret i1 %[[cmp]]
diff --git a/llvm/test/Transforms/InstCombine/pr27343.ll b/llvm/test/Transforms/InstCombine/pr27343.ll
new file mode 100644
index 00000000000..5a9267b16af
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr27343.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+define i32 @__isnan(float %x) alwaysinline nounwind optsize {
+; CHECK-LABEL: @__isnan(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCAST:%.*]] = bitcast float [[X:%.*]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[DOTCAST]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SHL]], -16777216
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+entry:
+  %x.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4
+  %0 = load float, float* %x.addr, align 4
+  %1 = bitcast float %0 to i32
+  %shl = shl i32 %1, 1
+  %cmp = icmp ugt i32 %shl, -16777216
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i1 @icmp_shl7(i32 %x) {
+; CHECK-LABEL: @icmp_shl7(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 4608
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shl = shl i32 %x, 7
+  %cmp = icmp slt i32 %shl, 4608
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/pr27703.ll b/llvm/test/Transforms/InstCombine/pr27703.ll
new file mode 100644
index 00000000000..2981afe171e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr27703.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @mem() {
+bb:
+  br label %bb6
+
+bb6:
+  %.0 = phi i8** [ undef, %bb ], [ %t2, %bb6 ]
+  %tmp = load i8*, i8** %.0, align 8
+  %bc = bitcast i8* %tmp to i8**
+  %t1 = load i8*, i8** %bc, align 8
+  %t2 = bitcast i8* %t1 to i8**
+  br label %bb6
+
+bb206:
+  ret void
+; CHECK: phi
+; CHECK: bitcast
+; CHECK: load
+}
diff --git a/llvm/test/Transforms/InstCombine/pr27996.ll b/llvm/test/Transforms/InstCombine/pr27996.ll
new file mode 100644
index 00000000000..3fefe6e7839
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr27996.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+
+@i = constant i32 1, align 4
+@f = constant float 0x3FF19999A0000000, align 4
+@cmp = common global i32 0, align 4
+@resf = common global float* null, align 8
+@resi = common global i32* null, align 8
+
+define i32 @foo() {
+entry:
+  br label %while.cond
+
+while.cond:
+  %res.0 = phi i32* [ null, %entry ], [ @i, %if.then ], [ bitcast (float* @f to i32*), %if.else ]
+  %0 = load i32, i32* @cmp, align 4
+  %shr = ashr i32 %0, 1
+  store i32 %shr, i32* @cmp, align 4
+  %tobool = icmp ne i32 %shr, 0
+  br i1 %tobool, label %while.body, label %while.end
+
+while.body:
+  %and = and i32 %shr, 1
+  %tobool1 = icmp ne i32 %and, 0
+  br i1 %tobool1, label %if.then, label %if.else
+
+if.then:
+  br label %while.cond
+
+if.else:
+  br label %while.cond
+
+while.end:
+  %1 = bitcast i32* %res.0 to float*
+  store float* %1, float** @resf, align 8
+  store i32* %res.0, i32** @resi, align 8
+  ret i32 0
+
+; CHECK-NOT: bitcast i32
+}
+
diff --git a/llvm/test/Transforms/InstCombine/pr28143.ll b/llvm/test/Transforms/InstCombine/pr28143.ll
new file mode 100644
index 00000000000..9ef273e5ed4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr28143.ll
@@ -0,0 +1,12 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define void @test1() {
+entry:
+  call void @tan()
+  ret void
+}
+; CHECK-LABEL: define void @test1(
+; CHECK:      call void @tan()
+; CHECK-NEXT: ret void
+
+declare void @tan()
diff --git a/llvm/test/Transforms/InstCombine/pr28725.ll b/llvm/test/Transforms/InstCombine/pr28725.ll
new file mode 100644
index 00000000000..ff9440d605a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr28725.ll
@@ -0,0 +1,11 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+%S = type { i16, i32 }
+
+define <2 x i16> @test1() {
+entry:
+  %b = insertelement <2 x i16> <i16 undef, i16 0>, i16 extractvalue (%S select (i1 icmp eq (i16 extractelement (<2 x i16> bitcast (<1 x i32> <i32 1> to <2 x i16>), i32 0), i16 0), %S zeroinitializer, %S { i16 0, i32 1 }), 0), i32 0
+  ret <2 x i16> %b
+}
+
+; CHECK-LABEL: @test1(
+; CHECK: ret <2 x i16> zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/pr2996.ll b/llvm/test/Transforms/InstCombine/pr2996.ll
new file mode 100644
index 00000000000..f5e1df4dd04
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr2996.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine
+; PR2996
+
+define void @func_53(i16 signext %p_56) nounwind {
+entry:
+	%0 = icmp sgt i16 %p_56, -1		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, i32 -1, i32 0		; <i32> [#uses=1]
+	%1 = call i32 (...) @func_4(i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @func_4(...)
diff --git a/llvm/test/Transforms/InstCombine/pr30929.ll b/llvm/test/Transforms/InstCombine/pr30929.ll
new file mode 100644
index 00000000000..2d19775f312
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr30929.ll
@@ -0,0 +1,11 @@
+; We need this pipeline because to trigger dominator info verification
+; we have to compute the dominator before libcalls-shrinkwrap and
+; have a pass which requires the dominator tree after.
+; RUN: opt -domtree -libcalls-shrinkwrap -instcombine -verify-dom-info %s
+
+define void @main() {
+  %_tmp31 = call float @acosf(float 2.000000e+00)
+  ret void
+}
+
+declare float @acosf(float)
diff --git a/llvm/test/Transforms/InstCombine/pr31990_wrong_memcpy.ll b/llvm/test/Transforms/InstCombine/pr31990_wrong_memcpy.ll
new file mode 100644
index 00000000000..f7874b9ee23
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr31990_wrong_memcpy.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+
+; Regression test of PR31990. A memcpy of one byte, copying 0xff, was
+; replaced with a single store of an i4 0xf.
+
+@g = constant i8 -1
+
+define void @foo() {
+entry:
+  %0 = alloca i8
+  %1 = bitcast i8* %0 to i4*
+  call void @bar(i4* %1)
+  %2 = bitcast i4* %1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* @g, i32 1, i1 false)
+  call void @gaz(i8* %2)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1)
+declare void @bar(i4*)
+declare void @gaz(i8*)
+
+; The mempcy should be simplified to a single store of an i8, not i4
+; CHECK: store i8 -1
+; CHECK-NOT: store i4 -1
diff --git a/llvm/test/Transforms/InstCombine/pr32686.ll b/llvm/test/Transforms/InstCombine/pr32686.ll
new file mode 100644
index 00000000000..b2d2aff2fde
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr32686.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+@a = external global i8
+@b = external global i32
+
+define void @tinkywinky() {
+; CHECK-LABEL: @tinkywinky(
+; CHECK-NEXT:    [[PATATINO:%.*]] = load i8, i8* @a, align 1
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i8 [[PATATINO]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[TOBOOL]] to i32
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[TMP1]], or (i32 zext (i1 icmp ne (i32* bitcast (i8* @a to i32*), i32* @b) to i32), i32 2)
+; CHECK-NEXT:    store i32 [[OR1]], i32* @b, align 4
+; CHECK-NEXT:    ret void
+;
+  %patatino = load i8, i8* @a
+  %tobool = icmp ne i8 %patatino, 0
+  %lnot = xor i1 %tobool, true
+  %lnot.ext = zext i1 %lnot to i32
+  %or = or i32 xor (i32 zext (i1 icmp ne (i32* bitcast (i8* @a to i32*), i32* @b) to i32), i32 2), %lnot.ext
+  store i32 %or, i32* @b, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/pr33453.ll b/llvm/test/Transforms/InstCombine/pr33453.ll
new file mode 100644
index 00000000000..dee4c5bf566
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr33453.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S %s | FileCheck %s
+
+@g1 = external global i16
+@g2 = external global i16
+
+define float @patatino() {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT:    ret float fmul (float uitofp (i1 icmp eq (i16* getelementptr inbounds (i16, i16* @g2, i64 1), i16* @g1) to float), float uitofp (i1 icmp eq (i16* getelementptr inbounds (i16, i16* @g2, i64 1), i16* @g1) to float))
+;
+  %call = call float @fabsf(float fmul (float uitofp (i1 icmp eq (i16* getelementptr inbounds (i16, i16* @g2, i64 1), i16* @g1) to float), float uitofp (i1 icmp eq (i16* getelementptr inbounds (i16, i16* @g2, i64 1), i16* @g1) to float)))
+  ret float %call
+}
+
+declare float @fabsf(float)
diff --git a/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll
new file mode 100644
index 00000000000..e5dd019b9b5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr33689_same_bitwidth.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+
+; All the "useless" instructions should be removed and we shouldn't crash.
+
+target datalayout = "p:16:16"
+
+%i64_t = type i64
+
+@a = external global i16
+@b = external global i16*
+
+define void @f() {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[TMP12:%.*]] = alloca [2 x i32], align 8
+; CHECK-NEXT:    [[TMP12_SUB:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP12]], i16 0, i16 0
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint [2 x i32]* [[TMP12]] to i16
+; CHECK-NEXT:    store i16 [[TMP8]], i16* @a, align 2
+; CHECK-NEXT:    unreachable
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16*, i16** @b, align 2
+; CHECK-NEXT:    store i16 0, i16* [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP12_SUB]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], -1
+; CHECK-NEXT:    store i32 [[TMP11]], i32* [[TMP12_SUB]], align 8
+; CHECK-NEXT:    ret void
+;
+bb0:
+  %tmp1 = alloca %i64_t
+  %tmp2 = bitcast %i64_t* %tmp1 to i32*
+  %useless3 = bitcast %i64_t* %tmp1 to i16*
+  %useless4 = getelementptr inbounds i16, i16* %useless3, i16 undef
+  %useless5 = bitcast i16* %useless4 to i32*
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb0
+  %useless6 = insertvalue [1 x i32*] undef, i32* %tmp2, 0
+  %useless7 = insertvalue [1 x i32*] %useless6, i32* null, 0
+  %tmp8 = ptrtoint i32* %tmp2 to i16
+  store i16 %tmp8, i16* @a
+  unreachable
+
+bb2:                                              ; preds = %bb0
+  %tmp9 = load i16*, i16** @b
+  store i16 0, i16* %tmp9
+  %tmp10 = load i32, i32* %tmp2
+  %tmp11 = sub i32 %tmp10, 1
+  store i32 %tmp11, i32* %tmp2
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/pr34349.ll b/llvm/test/Transforms/InstCombine/pr34349.ll
new file mode 100644
index 00000000000..b88f77a4c86
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr34349.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;RUN: opt -instcombine -S %s | FileCheck %s
+
+define i8 @fast_div_201(i8 %p) {
+; CHECK-LABEL: @fast_div_201(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V3:%.*]] = zext i8 [[P:%.*]] to i16
+; CHECK-NEXT:    [[V4:%.*]] = mul nuw nsw i16 [[V3]], 71
+; CHECK-NEXT:    [[V5:%.*]] = lshr i16 [[V4]], 8
+; CHECK-NEXT:    [[V6:%.*]] = trunc i16 [[V5]] to i8
+; CHECK-NEXT:    [[V7:%.*]] = sub i8 [[P]], [[V6]]
+; CHECK-NEXT:    [[V8:%.*]] = lshr i8 [[V7]], 1
+; CHECK-NEXT:    [[V13:%.*]] = add nuw i8 [[V8]], [[V6]]
+; CHECK-NEXT:    [[V14:%.*]] = lshr i8 [[V13]], 7
+; CHECK-NEXT:    ret i8 [[V14]]
+;
+entry:
+  %v3 = zext i8 %p to i16
+  %v4 = mul i16 %v3, 71
+  %v5 = lshr i16 %v4, 8
+  %v6 = trunc i16 %v5 to i8
+  %v7 = sub i8 %p, %v6
+  %v8 = lshr i8 %v7, 1
+  %v13 = add i8 %v6, %v8
+  %v14 = lshr i8 %v13, 7
+  ret i8 %v14
+}
diff --git a/llvm/test/Transforms/InstCombine/pr34627.ll b/llvm/test/Transforms/InstCombine/pr34627.ll
new file mode 100644
index 00000000000..8935ecf755c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr34627.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s |FileCheck %s
+
+define <2 x i16> @patatino() {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT:    ret <2 x i16> zeroinitializer
+;
+  %tmp2 = getelementptr inbounds [1 x i16], [1 x i16]* null, i16 0, <2 x i16> undef
+  %tmp3 = ptrtoint <2 x i16*> %tmp2 to <2 x i16>
+  ret <2 x i16> %tmp3
+}
diff --git a/llvm/test/Transforms/InstCombine/pr35515.ll b/llvm/test/Transforms/InstCombine/pr35515.ll
new file mode 100644
index 00000000000..1ad9b2fccd9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr35515.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+@g_40 = external global i8, align 2
+@g_461 = external global [6 x i8], align 2
+@g_49 = external local_unnamed_addr global { i8, i8, i8, i8, i8 }, align 2
+
+; CHECK-LABEL: @func_24(
+define fastcc void @func_24() {
+entry:
+  %bf.load81 = load i40, i40* bitcast ({ i8, i8, i8, i8, i8 }* @g_49 to i40*), align 2
+  %bf.clear = and i40 %bf.load81, -274869518337
+  %bf.set = or i40 %bf.clear, shl (i40 zext (i1 icmp sgt (i32 zext (i1 icmp eq (i8* getelementptr inbounds ([6 x i8], [6 x i8]* @g_461, i64 0, i64 2), i8* @g_40) to i32), i32 0) to i40), i40 23)
+  %tmp = lshr i40 %bf.set, 23
+  %tmp1 = trunc i40 %tmp to i32
+  %tmp2 = and i32 1, %tmp1
+  %tmp3 = shl nuw nsw i32 %tmp2, 23
+  %bf.shl154 = zext i32 %tmp3 to i40
+  %bf.set156 = or i40 %bf.clear, %bf.shl154
+  unreachable
+}
diff --git a/llvm/test/Transforms/InstCombine/pr36362.ll b/llvm/test/Transforms/InstCombine/pr36362.ll
new file mode 100644
index 00000000000..412691543a1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr36362.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;RUN: opt -instcombine -S %s | FileCheck %s
+
+; We shouldn't remove the select before the srem
+define i32 @foo(i1 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[A:%.*]], i32 [[B:%.*]], i32 -1
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[C:%.*]], [[SEL1]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[A]], i32 [[REM]], i32 0
+; CHECK-NEXT:    ret i32 [[SEL2]]
+;
+  %sel1 = select i1 %a, i32 %b, i32 -1
+  %rem = srem i32 %c, %sel1
+  %sel2 = select i1 %a, i32 %rem, i32 0
+  ret i32 %sel2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/pr38677.ll b/llvm/test/Transforms/InstCombine/pr38677.ll
new file mode 100644
index 00000000000..e5c6178105d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr38677.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;RUN: opt -instcombine -S %s | FileCheck %s
+
+@A = extern_weak global i32, align 4
+@B = extern_weak global i32, align 4
+
+define i32 @foo(i1 %which) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 true, label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[USE2:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ select (i1 icmp eq (i32* @A, i32* @B), i32 2, i32 1), [[DELAY]] ]
+; CHECK-NEXT:    [[B7:%.*]] = mul i32 [[USE2]], 2147483647
+; CHECK-NEXT:    [[C3:%.*]] = icmp eq i32 [[B7]], 0
+; CHECK-NEXT:    store i1 [[C3]], i1* undef, align 1
+; CHECK-NEXT:    ret i32 [[USE2]]
+;
+entry:
+  br i1 true, label %final, label %delay
+
+delay:                                            ; preds = %entry
+  br label %final
+
+final:                                            ; preds = %delay, %entry
+  %use2 = phi i1 [ false, %entry ], [ icmp eq (i32* @A, i32* @B), %delay ]
+  %value = select i1 %use2, i32 2, i32 1
+  %B7 = mul i32 %value, 2147483647
+  %C3 = icmp ule i32 %B7, 0
+  store i1 %C3, i1* undef
+  ret i32 %value
+}
diff --git a/llvm/test/Transforms/InstCombine/pr38897.ll b/llvm/test/Transforms/InstCombine/pr38897.ll
new file mode 100644
index 00000000000..0b10f3510e5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr38897.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -instcombine -S | FileCheck %s
+
+define i32 @sharpening(i32 %b340, i1 %c, i1 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
+; CHECK-LABEL: @sharpening(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SMAX58:%.*]] = select i1 [[C:%.*]], i32 [[E:%.*]], i32 [[F:%.*]]
+; CHECK-NEXT:    [[SMAX59:%.*]] = select i1 [[D:%.*]], i32 [[G:%.*]], i32 [[H:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[SMAX59]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], -1
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[SMAX58]], [[TMP12]]
+; CHECK-NEXT:    [[SMAX61:%.*]] = select i1 [[TMP13]], i32 [[SMAX58]], i32 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = xor i32 [[SMAX61]], -1
+; CHECK-NEXT:    ret i32 [[TMP14]]
+;
+entry:
+  %smax58 = select i1 %c, i32 %e, i32 %f
+  %smax59 = select i1 %d, i32 %g, i32 %h
+  %tmp10 = sub i32 -2, %smax59
+  %tmp11 = icmp sgt i32 %tmp10, 0
+  %smax60 = select i1 %tmp11, i32 %tmp10, i32 0
+  %tmp12 = xor i32 %smax60, -1
+  %tmp13 = icmp sgt i32 %smax58, %tmp12
+  %smax61 = select i1 %tmp13, i32 %smax58, i32 %tmp12
+  %tmp14 = xor i32 %smax61, -1
+  ret i32 %tmp14
+}
diff --git a/llvm/test/Transforms/InstCombine/pr38915.ll b/llvm/test/Transforms/InstCombine/pr38915.ll
new file mode 100644
index 00000000000..c23bf4a3933
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr38915.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -instcombine -S | FileCheck %s
+
+define i32 @PR38915(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @PR38915(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[M1N:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp sgt i32 [[M1N]], [[Z:%.*]]
+; CHECK-NEXT:    [[M2:%.*]] = select i1 [[C2]], i32 [[M1N]], i32 [[Z]]
+; CHECK-NEXT:    [[M2N:%.*]] = xor i32 [[M2]], -1
+; CHECK-NEXT:    ret i32 [[M2N]]
+;
+  %xn = sub i32 0, %x
+  %yn = sub i32 0, %y
+  %c1 = icmp sgt i32 %xn, %yn
+  %m1 = select i1 %c1, i32 %xn, i32 %yn
+  %m1n = xor i32 %m1, -1
+  %c2 = icmp sgt i32 %m1n, %z
+  %m2 = select i1 %c2, i32 %m1n, i32 %z
+  %m2n = xor i32 %m2, -1
+  ret i32 %m2n
+}
diff --git a/llvm/test/Transforms/InstCombine/pr38984.ll b/llvm/test/Transforms/InstCombine/pr38984.ll
new file mode 100644
index 00000000000..1334042d4a1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr38984.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "p:16:16"
+
+@a = external global [21 x i16], align 1
+@offsets = external global [4 x i16], align 1
+
+; The "same gep" optimization should work with vector icmp.
+define <4 x i1> @PR38984_1() {
+; CHECK-LABEL: @PR38984_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
+entry:
+  %0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 1
+  %1 = insertelement <4 x i16> undef, i16 %0, i32 3
+  %2 = getelementptr i32, i32* null, <4 x i16> %1
+  %3 = getelementptr i32, i32* null, <4 x i16> %1
+  %4 = icmp eq <4 x i32*> %2, %3
+  ret <4 x i1> %4
+}
+
+; The "compare base pointers" optimization should not kick in for vector icmp.
+define <4 x i1> @PR38984_2() {
+; CHECK-LABEL: @PR38984_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef), align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i16, i16* getelementptr inbounds ([21 x i16], [21 x i16]* @a, i16 1, i16 0), <4 x i16> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i16, i16* null, <4 x i16> [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i16*> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret <4 x i1> [[TMP4]]
+;
+entry:
+  %0 = load i16, i16* getelementptr ([4 x i16], [4 x i16]* @offsets, i16 0, i16 undef)
+  %1 = insertelement <4 x i16> undef, i16 %0, i32 3
+  %2 = getelementptr i16, i16* getelementptr ([21 x i16], [21 x i16]* @a, i64 1, i32 0), <4 x i16> %1
+  %3 = getelementptr i16, i16* null, <4 x i16> %1
+  %4 = icmp eq <4 x i16*> %2, %3
+  ret <4 x i1> %4
+}
diff --git a/llvm/test/Transforms/InstCombine/pr39177.ll b/llvm/test/Transforms/InstCombine/pr39177.ll
new file mode 100644
index 00000000000..35c5ce0d3f6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr39177.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@stderr = external global %struct._IO_FILE*, align 8
+@.str = private constant [8 x i8] c"crash!\0A\00", align 1
+
+@fwrite = alias i64 (i8*, i64, i64, %struct._IO_FILE*), i64 (i8*, i64, i64, %struct._IO_FILE*)* @__fwrite_alias
+
+define i64 @__fwrite_alias(i8* %ptr, i64 %size, i64 %n, %struct._IO_FILE* %s) {
+; CHECK-LABEL: @__fwrite_alias(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %ptr.addr = alloca i8*, align 8
+  %size.addr = alloca i64, align 8
+  %n.addr = alloca i64, align 8
+  %s.addr = alloca %struct._IO_FILE*, align 8
+  store i8* %ptr, i8** %ptr.addr, align 8
+  store i64 %size, i64* %size.addr, align 8
+  store i64 %n, i64* %n.addr, align 8
+  store %struct._IO_FILE* %s, %struct._IO_FILE** %s.addr, align 8
+  ret i64 0
+}
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @__fwrite_alias(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i64 7, i64 1, %struct._IO_FILE* [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
+  %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
diff --git a/llvm/test/Transforms/InstCombine/pr39908.ll b/llvm/test/Transforms/InstCombine/pr39908.ll
new file mode 100644
index 00000000000..bd7a82990ad
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr39908.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "p:32:32"
+
+%S = type { [2 x i32] }
+
+define i1 @test([0 x %S]* %p, i32 %n) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %start.cast = bitcast [0 x %S]* %p to %S*
+  %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i32 0, i32 %n, i32 0, i32 0
+  %end.cast = bitcast i32* %end to %S*
+  %last = getelementptr inbounds %S, %S* %end.cast, i32 -1
+  %cmp = icmp eq %S* %last, %start.cast
+  ret i1 %cmp
+}
+
+; Same test using 64-bit indices.
+define i1 @test64([0 x %S]* %p, i64 %n) {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %start.cast = bitcast [0 x %S]* %p to %S*
+  %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i64 0, i64 %n, i32 0, i64 0
+  %end.cast = bitcast i32* %end to %S*
+  %last = getelementptr inbounds %S, %S* %end.cast, i64 -1
+  %cmp = icmp eq %S* %last, %start.cast
+  ret i1 %cmp
+}
+
+; Here the offset overflows and is treated modulo 2^32. This is UB.
+define i1 @test64_overflow([0 x %S]* %p, i64 %n) {
+; CHECK-LABEL: @test64_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %start.cast = bitcast [0 x %S]* %p to %S*
+  %end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i64 0, i64 %n, i32 0, i64 8589934592
+  %end.cast = bitcast i32* %end to %S*
+  %last = getelementptr inbounds %S, %S* %end.cast, i64 -1
+  %cmp = icmp eq %S* %last, %start.cast
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/pr41164.ll b/llvm/test/Transforms/InstCombine/pr41164.ll
new file mode 100644
index 00000000000..372debab8ec
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr41164.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -instcombine -S | FileCheck %s
+
+@wyhash64_x = global i64 0, align 8
+
+define i64 @_Z8wyhash64v() {
+; CHECK-LABEL: @_Z8wyhash64v(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* @wyhash64_x, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 6971258582664805397
+; CHECK-NEXT:    store i64 [[TMP2]], i64* @wyhash64_x, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i64 [[TMP2]] to i128
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nuw i128 [[TMP3]], 11795372955171141389
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i128 [[TMP4]], 64
+; CHECK-NEXT:    [[DOTMASKED:%.*]] = and i128 [[TMP4]], 18446744073709551615
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i128 [[TMP5]], [[DOTMASKED]]
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw nsw i128 [[TMP6]], 1946526487930394057
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i128 [[TMP7]], 64
+; CHECK-NEXT:    [[TMP9:%.*]] = xor i128 [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc i128 [[TMP9]] to i64
+; CHECK-NEXT:    ret i64 [[TMP10]]
+;
+  %1 = load i64, i64* @wyhash64_x, align 8
+  %2 = add i64 %1, 6971258582664805397
+  store i64 %2, i64* @wyhash64_x, align 8
+  %3 = zext i64 %2 to i128
+  %4 = mul i128 %3, 11795372955171141389
+  %5 = lshr i128 %4, 64
+  %6 = xor i128 %5, %4
+  %7 = trunc i128 %6 to i64
+  %8 = zext i64 %7 to i128
+  %9 = mul i128 %8, 1946526487930394057
+  %10 = lshr i128 %9, 64
+  %11 = xor i128 %10, %9
+  %12 = trunc i128 %11 to i64
+  ret i64 %12
+}
diff --git a/llvm/test/Transforms/InstCombine/prefetch-load.ll b/llvm/test/Transforms/InstCombine/prefetch-load.ll
new file mode 100644
index 00000000000..f98b7ae00bf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/prefetch-load.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%struct.C = type { %struct.C*, i32 }
+
+; Check that we instcombine the load across the prefetch.
+
+; CHECK-LABEL: define signext i32 @foo
+define signext i32 @foo(%struct.C* %c) local_unnamed_addr #0 {
+; CHECK: store i32 %dec, i32* %length_
+; CHECK-NOT: load
+; CHECK: llvm.prefetch
+; CHECK-NEXT: ret
+entry:
+  %next_ = getelementptr inbounds %struct.C, %struct.C* %c, i32 0, i32 0
+  %0 = load %struct.C*, %struct.C** %next_, align 8
+  %next_1 = getelementptr inbounds %struct.C, %struct.C* %0, i32 0, i32 0
+  %1 = load %struct.C*, %struct.C** %next_1, align 8
+  store %struct.C* %1, %struct.C** %next_, align 8
+  %length_ = getelementptr inbounds %struct.C, %struct.C* %c, i32 0, i32 1
+  %2 = load i32, i32* %length_, align 8
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %length_, align 8
+  %3 = bitcast %struct.C* %1 to i8*
+  call void @llvm.prefetch(i8* %3, i32 0, i32 0, i32 1)
+  %4 = load i32, i32* %length_, align 8
+  ret i32 %4
+}
+
+; Function Attrs: inaccessiblemem_or_argmemonly nounwind
+declare void @llvm.prefetch(i8* nocapture readonly, i32, i32, i32) 
+
+attributes #0 = { noinline nounwind }
+; We've explicitly removed the function attrs from llvm.prefetch so we get the defaults.
+; attributes #1 = { inaccessiblemem_or_argmemonly nounwind }
diff --git a/llvm/test/Transforms/InstCombine/preserve-sminmax.ll b/llvm/test/Transforms/InstCombine/preserve-sminmax.ll
new file mode 100644
index 00000000000..00232ccf318
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/preserve-sminmax.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Instcombine normally would fold the sdiv into the comparison,
+; making "icmp slt i32 %h, 2", but in this case the sdiv has
+; another use, so it wouldn't a big win, and it would also
+; obfuscate an otherise obvious smax pattern to the point where
+; other analyses wouldn't recognize it.
+
+define i32 @foo(i32 %h) {
+  %sd = sdiv i32 %h, 2
+  %t = icmp slt i32 %sd, 1
+  %r = select i1 %t, i32 %sd, i32 1
+  ret i32 %r
+}
+
+; CHECK:  %sd = sdiv i32 %h, 2
+; CHECK:  %t = icmp slt i32 %sd, 1
+; CHECK:  %r = select i1 %t, i32 %sd, i32 1
+; CHECK:  ret i32 %r
+
+define i32 @bar(i32 %h) {
+  %sd = sdiv i32 %h, 2
+  %t = icmp sgt i32 %sd, 1
+  %r = select i1 %t, i32 %sd, i32 1
+  ret i32 %r
+}
+
+; CHECK:  %sd = sdiv i32 %h, 2
+; CHECK:  %t = icmp sgt i32 %sd, 1
+; CHECK:  %r = select i1 %t, i32 %sd, i32 1
+; CHECK:  ret i32 %r
+
diff --git a/llvm/test/Transforms/InstCombine/preserved-analyses.ll b/llvm/test/Transforms/InstCombine/preserved-analyses.ll
new file mode 100644
index 00000000000..767304aecf3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/preserved-analyses.ll
@@ -0,0 +1,33 @@
+; This is really testing that instcombine preserves analyses correctly, so we
+; don't care much about the code other than it is something instcombine can
+; transform.
+;
+; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 -aa-pipeline=basic-aa,globals-aa \
+; RUN:    -passes='require<globals-aa>,function(require<aa>,instcombine),function(require<aa>)' \
+; RUN:    | FileCheck %s --check-prefix=AA
+; AA: Running analysis: GlobalsAA
+; AA: Running analysis: AAManager
+; AA: Running analysis: BasicAA
+; AA: Running pass: InstCombinePass on test
+; AA-NOT: Invalidating analysis: GlobalsAA
+; AA-NOT: Invalidating analysis: AAmanager
+; AA-NOT: Invalidating analysis: BasicAA
+; AA: Running pass: RequireAnalysisPass<{{.*}}AAManager
+; AA-NOT: Running analysis: GlobalsAA
+; AA-NOT: Running analysis: AAmanager
+; AA-NOT: Running analysis: BasicAA
+;
+; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
+; RUN:    -passes='require<domtree>,instcombine,require<domtree>' \
+; RUN:    | FileCheck %s --check-prefix=DT
+; DT: Running analysis: DominatorTreeAnalysis
+; DT: Running pass: InstCombinePass on test
+; DT-NOT: Invalidating analysis: DominatorTreeAnalysis
+; DT: Running pass: RequireAnalysisPass<{{.*}}DominatorTreeAnalysis
+; DT-NOT: Running analysis: DominatorTreeAnalysis
+
+define i32 @test(i32 %A) {
+  %B = add i32 %A, 5
+  %C = add i32 %B, -5
+  ret i32 %C
+}
diff --git a/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll b/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
new file mode 100644
index 00000000000..ab37c7d5623
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This test makes sure that InstCombine does not replace the sequence of
+; xor/sub instruction followed by cmp instruction into a single cmp instruction
+; if there is more than one use of xor/sub.
+
+define zeroext i1 @test1(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %xor = xor i32 %lhs, 5
+; CHECK-NEXT: %cmp1 = icmp eq i32 %xor, 10
+
+  %xor = xor i32 %lhs, 5
+  %cmp1 = icmp eq i32 %xor, 10
+  %cmp2 = icmp eq i32 %xor, %rhs
+  %sel = or i1 %cmp1, %cmp2
+  ret i1 %sel
+}
+
+define zeroext i1 @test2(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %xor = xor i32 %lhs, %rhs
+; CHECK-NEXT: %cmp1 = icmp eq i32 %xor, 0
+
+  %xor = xor i32 %lhs, %rhs
+  %cmp1 = icmp eq i32 %xor, 0
+  %cmp2 = icmp eq i32 %xor, 32
+  %sel = xor i1 %cmp1, %cmp2
+  ret i1 %sel
+}
+
+define zeroext i1 @test3(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: %sub = sub nsw i32 %lhs, %rhs
+; CHECK-NEXT: %cmp1 = icmp eq i32 %sub, 0
+
+  %sub = sub nsw i32 %lhs, %rhs
+  %cmp1 = icmp eq i32 %sub, 0
+  %cmp2 = icmp eq i32 %sub, 31
+  %sel = or i1 %cmp1, %cmp2
+  ret i1 %sel
+}
diff --git a/llvm/test/Transforms/InstCombine/printf-1.ll b/llvm/test/Transforms/InstCombine/printf-1.ll
new file mode 100644
index 00000000000..9d13b360f81
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/printf-1.ll
@@ -0,0 +1,131 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@h = constant [2 x i8] c"h\00"
+@h2 = constant [3 x i8] c"%%\00"
+@percent = constant [2 x i8] c"%\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [4 x i8] c"%s\0A\00"
+@empty = constant [1 x i8] c"\00"
+; CHECK: [[$STR:@[a-z0-9]+]] = private unnamed_addr constant [12 x i8] c"hello world\00", align 1
+
+declare i32 @printf(i8*, ...)
+
+; Check printf("") -> noop.
+
+define void @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %fmt = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("x") -> putchar('x'), even for '%'.
+
+define void @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %fmt = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Special case: printf("%%") -> putchar('%').
+
+define void @test_simplify2b() {
+; CHECK-LABEL: @test_simplify2b(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @h2, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 37)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %fmt = getelementptr [2 x i8], [2 x i8]* @percent, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 37)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("foo\n") -> puts("foo").
+
+define void @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+  %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* [[$STR]], i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("%c", chr) -> putchar(chr).
+
+define void @test_simplify5() {
+; CHECK-LABEL: @test_simplify5(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt, i8 104)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("%s\n", str) -> puts(str).
+
+define void @test_simplify6() {
+; CHECK-LABEL: @test_simplify6(
+  %fmt = getelementptr [4 x i8], [4 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf(format, ...) -> iprintf(format, ...) if no floating point.
+
+define void @test_simplify7() {
+; CHECK-IPRINTF-LABEL: @test_simplify7(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt, i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, ...) @iprintf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-IPRINTF-NEXT: ret void
+}
+
+define void @test_no_simplify1() {
+; CHECK-IPRINTF-LABEL: @test_no_simplify1(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (i8*, ...) @printf(i8* %fmt, double 1.87)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-IPRINTF-NEXT: ret void
+}
+
+define void @test_no_simplify2(i8* %fmt, double %d) {
+; CHECK-LABEL: @test_no_simplify2(
+  call i32 (i8*, ...) @printf(i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, ...) @printf(i8* %fmt, double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define i32 @test_no_simplify3() {
+; CHECK-LABEL: @test_no_simplify3(
+  %fmt = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0
+  %ret = call i32 (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @h, i32 0, i32 0))
+  ret i32 %ret
+; CHECK-NEXT: ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/printf-2.ll b/llvm/test/Transforms/InstCombine/printf-2.ll
new file mode 100644
index 00000000000..fbd5b1bb96c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/printf-2.ll
@@ -0,0 +1,53 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@h = constant [2 x i8] c"h\00"
+@percent_s = constant [4 x i8] c"%s\0A\00"
+@format_str = constant [3 x i8] c"%s\00"
+@charstr = constant [2 x i8] c"a\00"
+
+declare void @printf(i8*, ...)
+
+; Check simplification of printf with void return type.
+
+define void @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %fmt = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0
+  call void (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify6() {
+; CHECK-LABEL: @test_simplify6(
+  %fmt = getelementptr [4 x i8], [4 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, ...) @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @format_str, i32 0, i32 0
+  %str = getelementptr [2 x i8], [2 x i8]* @charstr, i32 0, i32 0
+  call void (i8*, ...) @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @putchar(i32 97)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/printf-3.ll b/llvm/test/Transforms/InstCombine/printf-3.ll
new file mode 100644
index 00000000000..8f3a36a1346
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/printf-3.ll
@@ -0,0 +1,39 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+@.str = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @_CxxThrowException(i8* null, i8* null)
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %cp = catchpad within %cs [i8* null, i32 64, i8* null]
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0)) [ "funclet"(token %cp) ]
+  catchret from %cp to label %try.cont
+
+try.cont:
+  ret void
+
+unreachable:
+  unreachable
+}
+
+; CHECK-DAG: define void @test1(
+; CHECK: %[[CS:.*]] = catchswitch within none
+; CHECK: %[[CP:.*]] = catchpad within %[[CS]] [i8* null, i32 64, i8* null]
+; CHECK: call i32 @putchar(i32 10) [ "funclet"(token %[[CP]]) ]
+
+declare void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare i32 @printf(i8*, ...)
diff --git a/llvm/test/Transforms/InstCombine/ptr-int-cast.ll b/llvm/test/Transforms/InstCombine/ptr-int-cast.ll
new file mode 100644
index 00000000000..826c0048422
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i1 @test1(i32 *%x) nounwind {
+entry:
+; CHECK: test1
+; CHECK: ptrtoint i32* %x to i64
+	%0 = ptrtoint i32* %x to i1
+	ret i1 %0
+}
+
+define i32* @test2(i128 %x) nounwind {
+entry:
+; CHECK: test2
+; CHECK: inttoptr i64 %0 to i32*
+	%0 = inttoptr i128 %x to i32*
+	ret i32* %0
+}
+
+; PR3574
+; CHECK: f0
+; CHECK: %1 = zext i32 %a0 to i64
+; CHECK: ret i64 %1
+define i64 @f0(i32 %a0) nounwind {
+       %t0 = inttoptr i32 %a0 to i8*
+       %t1 = ptrtoint i8* %t0 to i64
+       ret i64 %t1
+}
+
+define <4 x i32> @test4(<4 x i8*> %arg) nounwind {
+; CHECK-LABEL: @test4(
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: trunc <4 x i64> %1 to <4 x i32>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  ret <4 x i32> %p1
+}
+
+define <4 x i128> @test5(<4 x i8*> %arg) nounwind {
+; CHECK-LABEL: @test5(
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: zext <4 x i64> %1 to <4 x i128>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i128>
+  ret <4 x i128> %p1
+}
+
+define <4 x i8*> @test6(<4 x i32> %arg) nounwind {
+; CHECK-LABEL: @test6(
+; CHECK: zext <4 x i32> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i32> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
+
+define <4 x i8*> @test7(<4 x i128> %arg) nounwind {
+; CHECK-LABEL: @test7(
+; CHECK: trunc <4 x i128> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i128> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
diff --git a/llvm/test/Transforms/InstCombine/puts-1.ll b/llvm/test/Transforms/InstCombine/puts-1.ll
new file mode 100644
index 00000000000..21028684851
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/puts-1.ll
@@ -0,0 +1,31 @@
+; Test that the puts library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@empty = constant [1 x i8] zeroinitializer
+
+declare i32 @puts(i8*)
+
+; Check puts("") -> putchar('\n').
+
+define void @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  call i32 @puts(i8* %str)
+; CHECK-NEXT: call i32 @putchar(i32 10)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Don't simplify if the return value is used.
+
+define i32 @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  %ret = call i32 @puts(i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @empty, i32 0, i32 0))
+  ret i32 %ret
+; CHECK-NEXT: ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/range-check.ll b/llvm/test/Transforms/InstCombine/range-check.ll
new file mode 100644
index 00000000000..35f11dd39ef
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/range-check.ll
@@ -0,0 +1,159 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check simplification of
+; (icmp sgt x, -1) & (icmp sgt/sge n, x) --> icmp ugt/uge n, x
+
+; CHECK-LABEL: define i1 @test_and1
+; CHECK: [[R:%[0-9]+]] = icmp ugt i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sge i32 %x, 0
+  %b = icmp slt i32 %x, %nn
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and2
+; CHECK: [[R:%[0-9]+]] = icmp uge i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and2(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sgt i32 %x, -1
+  %b = icmp sle i32 %x, %nn
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and3
+; CHECK: [[R:%[0-9]+]] = icmp ugt i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and3(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sgt i32 %nn, %x
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and4
+; CHECK: [[R:%[0-9]+]] = icmp uge i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sge i32 %nn, %x
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or1
+; CHECK: [[R:%[0-9]+]] = icmp ule i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, 0
+  %b = icmp sge i32 %x, %nn
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or2
+; CHECK: [[R:%[0-9]+]] = icmp ult i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or2(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sle i32 %x, -1
+  %b = icmp sgt i32 %x, %nn
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or3
+; CHECK: [[R:%[0-9]+]] = icmp ule i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or3(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sle i32 %nn, %x
+  %b = icmp slt i32 %x, 0
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or4
+; CHECK: [[R:%[0-9]+]] = icmp ult i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %nn, %x
+  %b = icmp slt i32 %x, 0
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; Negative tests
+
+; CHECK-LABEL: define i1 @negative1
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sgt i32 %x, 0      ; should be: icmp sge
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative2
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative2(i32 %x, i32 %n) {
+  %a = icmp slt i32 %x, %n     ; n can be negative
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative3
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative3(i32 %x, i32 %y, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sge i32 %y, 0      ; should compare %x and not %y
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative4
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp ne i32 %x, %nn     ; should be: icmp slt/sle
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative5
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = or i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative5(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sge i32 %x, 0
+  %c = or i1 %a, %b            ; should be: and
+  ret i1 %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/readnone-maythrow.ll b/llvm/test/Transforms/InstCombine/readnone-maythrow.ll
new file mode 100644
index 00000000000..f01e90263a3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/readnone-maythrow.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @readnone_but_may_throw() readnone
+
+define void @f_0(i32* %ptr) {
+; CHECK-LABEL: @f_0(
+entry:
+; CHECK:  store i32 10, i32* %ptr
+; CHECK-NEXT:  call void @readnone_but_may_throw()
+; CHECK-NEXT:  store i32 20, i32* %ptr, align 4
+; CHECK:  ret void
+
+  store i32 10, i32* %ptr
+  call void @readnone_but_may_throw()
+  store i32 20, i32* %ptr
+  ret void
+}
+
+define void @f_1(i1 %cond, i32* %ptr) {
+; CHECK-LABEL: @f_1(
+; CHECK:  store i32 10, i32* %ptr
+; CHECK-NEXT:  call void @readnone_but_may_throw()
+
+  store i32 10, i32* %ptr
+  call void @readnone_but_may_throw()
+  br i1 %cond, label %left, label %merge
+
+left:
+  store i32 20, i32* %ptr
+  br label %merge
+
+merge:
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/realloc.ll b/llvm/test/Transforms/InstCombine/realloc.ll
new file mode 100644
index 00000000000..22f37f1d2fa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/realloc.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare i8* @realloc(i8*, i64) #1
+declare noalias i8* @malloc(i64) #1
+
+
+define i8* @realloc_null_ptr() #0 {
+; CHECK-LABEL: @realloc_null_ptr(
+; CHECK-NEXT:    [[MALLOC:%.*]] = call i8* @malloc(i64 100)
+; CHECK-NEXT:    ret i8* [[MALLOC]]
+;
+  %call = call i8* @realloc(i8* null, i64 100) #2
+  ret i8* %call
+}
+
+define i8* @realloc_unknown_ptr(i8* %ptr) #0 {
+; CHECK-LABEL: @realloc_unknown_ptr(
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @realloc(i8* [[PTR:%.*]], i64 100)
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+  %call = call i8* @realloc(i8* %ptr, i64 100) #2
+  ret i8* %call
+}
diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll
new file mode 100644
index 00000000000..200e0389fd7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/rem.ll
@@ -0,0 +1,672 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @rem_signed(i64 %x1, i64 %y2) {
+; CHECK-LABEL: @rem_signed(
+; CHECK-NEXT:    [[TMP1:%.*]] = srem i64 [[X1:%.*]], [[Y2:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %r = sdiv i64 %x1, %y2
+  %r7 = mul i64 %r, %y2
+  %r8 = sub i64 %x1, %r7
+  ret i64 %r8
+}
+
+define <4 x i32> @rem_signed_vec(<4 x i32> %t, <4 x i32> %u) {
+; CHECK-LABEL: @rem_signed_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = srem <4 x i32> [[T:%.*]], [[U:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %k = sdiv <4 x i32> %t, %u
+  %l = mul <4 x i32> %k, %u
+  %m = sub <4 x i32> %t, %l
+  ret <4 x i32> %m
+}
+
+define i64 @rem_unsigned(i64 %x1, i64 %y2) {
+; CHECK-LABEL: @rem_unsigned(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i64 [[X1:%.*]], [[Y2:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %r = udiv i64 %x1, %y2
+  %r7 = mul i64 %r, %y2
+  %r8 = sub i64 %x1, %r7
+  ret i64 %r8
+}
+
+; PR28672 - https://llvm.org/bugs/show_bug.cgi?id=28672
+
+define i8 @big_divisor(i8 %x) {
+; CHECK-LABEL: @big_divisor(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], -127
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[X]], 127
+; CHECK-NEXT:    [[REM:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[TMP2]]
+; CHECK-NEXT:    ret i8 [[REM]]
+;
+  %rem = urem i8 %x, 129
+  ret i8 %rem
+}
+
+define i5 @biggest_divisor(i5 %x) {
+; CHECK-LABEL: @biggest_divisor(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i5 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i5
+; CHECK-NEXT:    [[REM:%.*]] = add i5 [[TMP2]], [[X]]
+; CHECK-NEXT:    ret i5 [[REM]]
+;
+  %rem = urem i5 %x, -1
+  ret i5 %rem
+}
+
+define i8 @urem_with_sext_bool_divisor(i1 %x, i8 %y) {
+; CHECK-LABEL: @urem_with_sext_bool_divisor(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[REM:%.*]] = select i1 [[TMP1]], i8 0, i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[REM]]
+;
+  %s = sext i1 %x to i8
+  %rem = urem i8 %y, %s
+  ret i8 %rem
+}
+
+define <2 x i8> @urem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i8> %y) {
+; CHECK-LABEL: @urem_with_sext_bool_divisor_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> zeroinitializer, <2 x i8> [[Y]]
+; CHECK-NEXT:    ret <2 x i8> [[REM]]
+;
+  %s = sext <2 x i1> %x to <2 x i8>
+  %rem = urem <2 x i8> %y, %s
+  ret <2 x i8> %rem
+}
+
+define <2 x i4> @big_divisor_vec(<2 x i4> %x) {
+; CHECK-LABEL: @big_divisor_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i4> [[X:%.*]], <i4 -3, i4 -3>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i4> [[X]], <i4 3, i4 3>
+; CHECK-NEXT:    [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i4> [[X]], <2 x i4> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[REM]]
+;
+  %rem = urem <2 x i4> %x, <i4 13, i4 13>
+  ret <2 x i4> %rem
+}
+
+define i8 @urem1(i8 %x, i8 %y) {
+; CHECK-LABEL: @urem1(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %A = udiv i8 %x, %y
+  %B = mul i8 %A, %y
+  %C = sub i8 %x, %B
+  ret i8 %C
+}
+
+define i8 @srem1(i8 %x, i8 %y) {
+; CHECK-LABEL: @srem1(
+; CHECK-NEXT:    [[TMP1:%.*]] = srem i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %A = sdiv i8 %x, %y
+  %B = mul i8 %A, %y
+  %C = sub i8 %x, %B
+  ret i8 %C
+}
+
+define i8 @urem2(i8 %x, i8 %y) {
+; CHECK-LABEL: @urem2(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = sub i8 0, [[TMP1]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %A = udiv i8 %x, %y
+  %B = mul i8 %A, %y
+  %C = sub i8 %B, %x
+  ret i8 %C
+}
+
+define i8 @urem3(i8 %x) {
+; CHECK-LABEL: @urem3(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[B1:%.*]] = sub i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[B1]], [[X]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %A = udiv i8 %x, 3
+  %B = mul i8 %A, -3
+  %C = sub i8 %x, %B
+  ret i8 %C
+}
+
+; (((X / Y) * Y) / Y) -> X / Y
+
+define i32 @sdiv_mul_sdiv(i32 %x, i32 %y) {
+; CHECK-LABEL: @sdiv_mul_sdiv(
+; CHECK-NEXT:    [[R:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %div = sdiv i32 %x, %y
+  %mul = mul i32 %div, %y
+  %r = sdiv i32 %mul, %y
+  ret i32 %r
+}
+
+; (((X / Y) * Y) / Y) -> X / Y
+
+define i32 @udiv_mul_udiv(i32 %x, i32 %y) {
+; CHECK-LABEL: @udiv_mul_udiv(
+; CHECK-NEXT:    [[R:%.*]] = udiv i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %div = udiv i32 %x, %y
+  %mul = mul i32 %div, %y
+  %r = udiv i32 %mul, %y
+  ret i32 %r
+}
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = srem i32 %A, 1	; ISA constant 0
+  ret i32 %B
+}
+
+define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = urem i32 %A, 8
+  ret i32 %B
+}
+
+define <2 x i32> @vec_power_of_2_constant_splat_divisor(<2 x i32> %A) {
+; CHECK-LABEL: @vec_power_of_2_constant_splat_divisor(
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %B = urem <2 x i32> %A, <i32 8, i32 8>
+  ret <2 x i32> %B
+}
+
+define <2 x i19> @weird_vec_power_of_2_constant_splat_divisor(<2 x i19> %A) {
+; CHECK-LABEL: @weird_vec_power_of_2_constant_splat_divisor(
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i19> [[A:%.*]], <i19 7, i19 7>
+; CHECK-NEXT:    ret <2 x i19> [[B]]
+;
+  %B = urem <2 x i19> %A, <i19 8, i19 8>
+  ret <2 x i19> %B
+}
+
+define i1 @test3a(i32 %A) {
+; CHECK-LABEL: @test3a(
+; CHECK-NEXT:    [[B1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[B1]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = srem i32 %A, -8
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
+
+define <2 x i1> @test3a_vec(<2 x i32> %A) {
+; CHECK-LABEL: @test3a_vec(
+; CHECK-NEXT:    [[B1:%.*]] = and <2 x i32> [[A:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[B1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = srem <2 x i32> %A, <i32 -8, i32 -8>
+  %C = icmp ne <2 x i32> %B, zeroinitializer
+  ret <2 x i1> %C
+}
+
+define i32 @test4(i32 %X, i1 %C) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C:%.*]], i32 0, i32 7
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %V = select i1 %C, i32 1, i32 8
+  %R = urem i32 %X, %V
+  ret i32 %R
+}
+
+define i32 @test5(i32 %X, i8 %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[SHIFT_UPGRD_1:%.*]] = zext i8 [[B:%.*]] to i32
+; CHECK-NEXT:    [[AMT:%.*]] = shl nuw i32 32, [[SHIFT_UPGRD_1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[AMT]], -1
+; CHECK-NEXT:    [[V:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %shift.upgrd.1 = zext i8 %B to i32
+  %Amt = shl i32 32, %shift.upgrd.1
+  %V = urem i32 %X, %Amt
+  ret i32 %V
+}
+
+define i32 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i32 undef
+;
+  %B = srem i32 %A, 0	;; undef
+  ret i32 %B
+}
+
+define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = mul i32 %A, 8
+  %C = srem i32 %B, 4
+  ret i32 %C
+}
+
+define i32 @test8(i32 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = shl i32 %A, 4
+  %C = srem i32 %B, 8
+  ret i32 %C
+}
+
+define i32 @test9(i32 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = mul i32 %A, 64
+  %C = urem i32 %B, 32
+  ret i32 %C
+}
+
+define i32 @test10(i8 %c) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i32 0
+;
+  %tmp.1 = zext i8 %c to i32
+  %tmp.2 = mul i32 %tmp.1, 4
+  %tmp.3 = sext i32 %tmp.2 to i64
+  %tmp.5 = urem i64 %tmp.3, 4
+  %tmp.6 = trunc i64 %tmp.5 to i32
+  ret i32 %tmp.6
+}
+
+define i32 @test11(i32 %i) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    ret i32 0
+;
+  %tmp.1 = and i32 %i, -2
+  %tmp.3 = mul i32 %tmp.1, 2
+  %tmp.5 = urem i32 %tmp.3, 4
+  ret i32 %tmp.5
+}
+
+define i32 @test12(i32 %i) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    ret i32 0
+;
+  %tmp.1 = and i32 %i, -4
+  %tmp.5 = srem i32 %tmp.1, 2
+  ret i32 %tmp.5
+}
+
+define i32 @test13(i32 %i) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret i32 0
+;
+  %x = srem i32 %i, %i
+  ret i32 %x
+}
+
+define i64 @test14(i64 %x, i32 %y) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[Y:%.*]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[SHL]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[ZEXT]], -1
+; CHECK-NEXT:    [[UREM:%.*]] = and i64 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i64 [[UREM]]
+;
+  %shl = shl i32 1, %y
+  %zext = zext i32 %shl to i64
+  %urem = urem i64 %x, %zext
+  ret i64 %urem
+}
+
+define i64 @test15(i32 %x, i32 %y) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[UREM:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    ret i64 [[UREM]]
+;
+  %shl = shl i32 1, %y
+  %zext0 = zext i32 %shl to i64
+  %zext1 = zext i32 %x to i64
+  %urem = urem i64 %zext1, %zext0
+  ret i64 %urem
+}
+
+define i32 @test16(i32 %x, i32 %y) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[Y:%.*]], 11
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHR]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], 3
+; CHECK-NEXT:    [[REM:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[REM]]
+;
+  %shr = lshr i32 %y, 11
+  %and = and i32 %shr, 4
+  %add = add i32 %and, 4
+  %rem = urem i32 %x, %add
+  ret i32 %rem
+}
+
+define i32 @test17(i32 %X) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %A = urem i32 1, %X
+  ret i32 %A
+}
+
+define i32 @test18(i16 %x, i32 %y) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 63, i32 31
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %1 = and i16 %x, 4
+  %2 = icmp ne i16 %1, 0
+  %3 = select i1 %2, i32 32, i32 64
+  %4 = urem i32 %y, %3
+  ret i32 %4
+}
+
+define i32 @test19(i32 %x, i32 %y) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[A:%.*]] = shl i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = shl i32 1, [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[C]], [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[D]], -1
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %A = shl i32 1, %x
+  %B = shl i32 1, %y
+  %C = and i32 %A, %B
+  %D = add i32 %C, %A
+  %E = urem i32 %y, %D
+  ret i32 %E
+}
+
+define i32 @test19_commutative0(i32 %x, i32 %y) {
+; CHECK-LABEL: @test19_commutative0(
+; CHECK-NEXT:    [[A:%.*]] = shl i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = shl i32 1, [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[B]], [[A]]
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[C]], [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[D]], -1
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %A = shl i32 1, %x
+  %B = shl i32 1, %y
+  %C = and i32 %B, %A ; swapped
+  %D = add i32 %C, %A
+  %E = urem i32 %y, %D
+  ret i32 %E
+}
+
+define i32 @test19_commutative1(i32 %x, i32 %y) {
+; CHECK-LABEL: @test19_commutative1(
+; CHECK-NEXT:    [[A:%.*]] = shl i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = shl i32 1, [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[A]], [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[D]], -1
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %A = shl i32 1, %x
+  %B = shl i32 1, %y
+  %C = and i32 %A, %B
+  %D = add i32 %A, %C ; swapped
+  %E = urem i32 %y, %D
+  ret i32 %E
+}
+
+define i32 @test19_commutative2(i32 %x, i32 %y) {
+; CHECK-LABEL: @test19_commutative2(
+; CHECK-NEXT:    [[A:%.*]] = shl i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = shl i32 1, [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = and i32 [[B]], [[A]]
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[A]], [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[D]], -1
+; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %A = shl i32 1, %x
+  %B = shl i32 1, %y
+  %C = and i32 %B, %A ; swapped
+  %D = add i32 %A, %C ; swapped
+  %E = urem i32 %y, %D
+  ret i32 %E
+}
+
+define <2 x i64> @test20(<2 x i64> %X, <2 x i1> %C) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i64> <i64 1, i64 2>, <2 x i64> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[R]]
+;
+  %V = select <2 x i1> %C, <2 x i64> <i64 1, i64 2>, <2 x i64> <i64 8, i64 9>
+  %R = urem <2 x i64> %V, <i64 2, i64 3>
+  ret <2 x i64> %R
+}
+
+define i32 @test21(i1 %c0, i32* %p) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C0:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[PHITMP:%.*]] = srem i32 [[V]], 5
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i32 [ [[PHITMP]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[LHS]]
+;
+entry:
+  br i1 %c0, label %if.then, label %if.end
+
+if.then:
+  %v = load volatile i32, i32* %p
+  br label %if.end
+
+if.end:
+  %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+  %rem = srem i32 %lhs, 5
+  ret i32 %rem
+}
+
+@a = common global [5 x i16] zeroinitializer, align 2
+@b = common global i16 0, align 2
+
+define i32 @pr27968_0(i1 %c0, i32* %p) {
+; CHECK-LABEL: @pr27968_0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C0:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label [[REM_IS_SAFE:%.*]], label [[REM_IS_UNSAFE:%.*]]
+; CHECK:       rem.is.safe:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       rem.is.unsafe:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br i1 %c0, label %if.then, label %if.end
+
+if.then:
+  %v = load volatile i32, i32* %p
+  br label %if.end
+
+if.end:
+  %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+  br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label %rem.is.safe, label %rem.is.unsafe
+
+rem.is.safe:
+  %rem = srem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
+  ret i32 %rem
+
+rem.is.unsafe:
+  ret i32 0
+}
+
+define i32 @pr27968_1(i1 %c0, i1 %always_false, i32* %p) {
+; CHECK-LABEL: @pr27968_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C0:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i32 [ [[V]], [[IF_THEN]] ], [ 5, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br i1 [[ALWAYS_FALSE:%.*]], label [[REM_IS_SAFE:%.*]], label [[REM_IS_UNSAFE:%.*]]
+; CHECK:       rem.is.safe:
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[LHS]], -2147483648
+; CHECK-NEXT:    ret i32 [[REM]]
+; CHECK:       rem.is.unsafe:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br i1 %c0, label %if.then, label %if.end
+
+if.then:
+  %v = load volatile i32, i32* %p
+  br label %if.end
+
+if.end:
+  %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+  br i1 %always_false, label %rem.is.safe, label %rem.is.unsafe
+
+rem.is.safe:
+  %rem = srem i32 %lhs, -2147483648
+  ret i32 %rem
+
+rem.is.unsafe:
+  ret i32 0
+}
+
+define i32 @pr27968_2(i1 %c0, i32* %p) {
+; CHECK-LABEL: @pr27968_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C0:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label [[REM_IS_SAFE:%.*]], label [[REM_IS_UNSAFE:%.*]]
+; CHECK:       rem.is.safe:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       rem.is.unsafe:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br i1 %c0, label %if.then, label %if.end
+
+if.then:
+  %v = load volatile i32, i32* %p
+  br label %if.end
+
+if.end:
+  %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+  br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label %rem.is.safe, label %rem.is.unsafe
+
+rem.is.safe:
+  %rem = urem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
+  ret i32 %rem
+
+rem.is.unsafe:
+  ret i32 0
+}
+
+define i32 @pr27968_3(i1 %c0, i1 %always_false, i32* %p) {
+; CHECK-LABEL: @pr27968_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C0:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[PHITMP:%.*]] = and i32 [[V]], 2147483647
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i32 [ [[PHITMP]], [[IF_THEN]] ], [ 5, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br i1 [[ALWAYS_FALSE:%.*]], label [[REM_IS_SAFE:%.*]], label [[REM_IS_UNSAFE:%.*]]
+; CHECK:       rem.is.safe:
+; CHECK-NEXT:    ret i32 [[LHS]]
+; CHECK:       rem.is.unsafe:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br i1 %c0, label %if.then, label %if.end
+
+if.then:
+  %v = load volatile i32, i32* %p
+  br label %if.end
+
+if.end:
+  %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+  br i1 %always_false, label %rem.is.safe, label %rem.is.unsafe
+
+rem.is.safe:
+  %rem = urem i32 %lhs, -2147483648
+  ret i32 %rem
+
+rem.is.unsafe:
+  ret i32 0
+}
+
+define i32 @test22(i32 %A) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 2147483647
+; CHECK-NEXT:    [[MUL:%.*]] = urem i32 [[AND]], 2147483647
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %and = and i32 %A, 2147483647
+  %mul = srem i32 %and, 2147483647
+  ret i32 %mul
+}
+
+define <2 x i32> @test23(<2 x i32> %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[MUL:%.*]] = urem <2 x i32> [[AND]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    ret <2 x i32> [[MUL]]
+;
+  %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
+  %mul = srem <2 x i32> %and, <i32 2147483647, i32 2147483647>
+  ret <2 x i32> %mul
+}
+
+; FP division-by-zero is not UB.
+
+define double @PR34870(i1 %cond, double %x, double %y) {
+; CHECK-LABEL: @PR34870(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], double [[Y:%.*]], double 0.000000e+00
+; CHECK-NEXT:    [[FMOD:%.*]] = frem double [[X:%.*]], [[SEL]]
+; CHECK-NEXT:    ret double [[FMOD]]
+;
+  %sel = select i1 %cond, double %y, double 0.0
+  %fmod = frem double %x, %sel
+  ret double %fmod
+}
+
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
new file mode 100644
index 00000000000..6e11c68df92
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -0,0 +1,705 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+; TODO: Canonicalize rotate by constant to funnel shift intrinsics.
+; This should help cost modeling for vectorization, inlining, etc.
+; If a target does not have a rotate instruction, the expansion will
+; be exactly these same 3 basic ops (shl/lshr/or).
+
+define i32 @rotl_i32_constant(i32 %x) {
+; CHECK-LABEL: @rotl_i32_constant(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 11
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], 21
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %shl = shl i32 %x, 11
+  %shr = lshr i32 %x, 21
+  %r = or i32 %shr, %shl
+  ret i32 %r
+}
+
+define i42 @rotr_i42_constant(i42 %x) {
+; CHECK-LABEL: @rotr_i42_constant(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i42 [[X:%.*]], 31
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i42 [[X]], 11
+; CHECK-NEXT:    [[R:%.*]] = or i42 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i42 [[R]]
+;
+  %shl = shl i42 %x, 31
+  %shr = lshr i42 %x, 11
+  %r = or i42 %shr, %shl
+  ret i42 %r
+}
+
+define i8 @rotr_i8_constant_commute(i8 %x) {
+; CHECK-LABEL: @rotr_i8_constant_commute(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], 3
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %shl = shl i8 %x, 5
+  %shr = lshr i8 %x, 3
+  %r = or i8 %shl, %shr
+  ret i8 %r
+}
+
+define i88 @rotl_i88_constant_commute(i88 %x) {
+; CHECK-LABEL: @rotl_i88_constant_commute(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i88 [[X:%.*]], 44
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i88 [[X]], 44
+; CHECK-NEXT:    [[R:%.*]] = or i88 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i88 [[R]]
+;
+  %shl = shl i88 %x, 44
+  %shr = lshr i88 %x, 44
+  %r = or i88 %shl, %shr
+  ret i88 %r
+}
+
+; Vector types are allowed.
+
+define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
+; CHECK-LABEL: @rotl_v2i16_constant_splat(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i16> [[X]], <i16 15, i16 15>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %shl = shl <2 x i16> %x, <i16 1, i16 1>
+  %shr = lshr <2 x i16> %x, <i16 15, i16 15>
+  %r = or <2 x i16> %shl, %shr
+  ret <2 x i16> %r
+}
+
+; Non-power-of-2 vector types are allowed.
+
+define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) {
+; CHECK-LABEL: @rotr_v2i17_constant_splat(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i17> [[X:%.*]], <i17 12, i17 12>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i17> [[X]], <i17 5, i17 5>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret <2 x i17> [[R]]
+;
+  %shl = shl <2 x i17> %x, <i17 12, i17 12>
+  %shr = lshr <2 x i17> %x, <i17 5, i17 5>
+  %r = or <2 x i17> %shr, %shl
+  ret <2 x i17> %r
+}
+
+; Allow arbitrary shift constants.
+
+define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 17, i32 19>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X]], <i32 15, i32 13>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %shl = shl <2 x i32> %x, <i32 17, i32 19>
+  %shr = lshr <2 x i32> %x, <i32 15, i32 13>
+  %r = or <2 x i32> %shl, %shr
+  ret <2 x i32> %r
+}
+
+define <2 x i36> @rotl_v2i16_constant_nonsplat(<2 x i36> %x) {
+; CHECK-LABEL: @rotl_v2i16_constant_nonsplat(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i36> [[X:%.*]], <i36 21, i36 11>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i36> [[X]], <i36 15, i36 25>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i36> [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret <2 x i36> [[R]]
+;
+  %shl = shl <2 x i36> %x, <i36 21, i36 11>
+  %shr = lshr <2 x i36> %x, <i36 15, i36 25>
+  %r = or <2 x i36> %shl, %shr
+  ret <2 x i36> %r
+}
+
+; The most basic rotate by variable - no guards for UB due to oversized shifts.
+; This cannot be canonicalized to funnel shift target-independently. The safe
+; expansion includes masking for the shift amount that is not included here,
+; so it could be more expensive.
+
+define i32 @rotl_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: @rotl_i32(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %sub = sub i32 32, %y
+  %shl = shl i32 %x, %y
+  %shr = lshr i32 %x, %sub
+  %r = or i32 %shr, %shl
+  ret i32 %r
+}
+
+; Non-power-of-2 types should follow the same reasoning. Left/right is determined by subtract.
+
+define i37 @rotr_i37(i37 %x, i37 %y) {
+; CHECK-LABEL: @rotr_i37(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = or i37 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i37 [[R]]
+;
+  %sub = sub i37 37, %y
+  %shl = shl i37 %x, %sub
+  %shr = lshr i37 %x, %y
+  %r = or i37 %shr, %shl
+  ret i37 %r
+}
+
+; Commute 'or' operands.
+
+define i8 @rotr_i8_commute(i8 %x, i8 %y) {
+; CHECK-LABEL: @rotr_i8_commute(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sub = sub i8 8, %y
+  %shl = shl i8 %x, %sub
+  %shr = lshr i8 %x, %y
+  %r = or i8 %shl, %shr
+  ret i8 %r
+}
+
+; Vector types should follow the same rules.
+
+define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @rotl_v4i32(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
+; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
+  %shl = shl <4 x i32> %x, %y
+  %shr = lshr <4 x i32> %x, %sub
+  %r = or <4 x i32> %shl, %shr
+  ret <4 x i32> %r
+}
+
+; Non-power-of-2 vector types should follow the same rules.
+
+define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
+; CHECK-LABEL: @rotr_v3i42(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <3 x i42> <i42 42, i42 42, i42 42>, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret <3 x i42> [[R]]
+;
+  %sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
+  %shl = shl <3 x i42> %x, %sub
+  %shr = lshr <3 x i42> %x, %y
+  %r = or <3 x i42> %shr, %shl
+  ret <3 x i42> %r
+}
+
+; This is the canonical pattern for a UB-safe rotate-by-variable with power-of-2-size scalar type.
+; The backend expansion of funnel shift for targets that don't have a rotate instruction should
+; match the original IR, so it is always good to canonicalize to the intrinsics for this pattern.
+
+define i32 @rotl_safe_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: @rotl_safe_i32(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %negy = sub i32 0, %y
+  %ymask = and i32 %y, 31
+  %negymask = and i32 %negy, 31
+  %shl = shl i32 %x, %ymask
+  %shr = lshr i32 %x, %negymask
+  %r = or i32 %shr, %shl
+  ret i32 %r
+}
+
+; Extra uses don't change anything.
+
+define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) {
+; CHECK-LABEL: @rotl_safe_i16_commute_extra_use(
+; CHECK-NEXT:    [[NEGY:%.*]] = sub i16 0, [[Y:%.*]]
+; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15
+; CHECK-NEXT:    store i16 [[NEGYMASK]], i16* [[P:%.*]], align 2
+; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]])
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %negy = sub i16 0, %y
+  %ymask = and i16 %y, 15
+  %negymask = and i16 %negy, 15
+  store i16 %negymask, i16* %p
+  %shl = shl i16 %x, %ymask
+  %shr = lshr i16 %x, %negymask
+  %r = or i16 %shl, %shr
+  ret i16 %r
+}
+
+; Left/right is determined by the negation.
+
+define i64 @rotr_safe_i64(i64 %x, i64 %y) {
+; CHECK-LABEL: @rotr_safe_i64(
+; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %negy = sub i64 0, %y
+  %ymask = and i64 %y, 63
+  %negymask = and i64 %negy, 63
+  %shl = shl i64 %x, %negymask
+  %shr = lshr i64 %x, %ymask
+  %r = or i64 %shr, %shl
+  ret i64 %r
+}
+
+; Extra uses don't change anything.
+
+define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, i8* %p) {
+; CHECK-LABEL: @rotr_safe_i8_commute_extra_use(
+; CHECK-NEXT:    [[NEGY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT:    [[YMASK:%.*]] = and i8 [[Y]], 7
+; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i8 [[NEGY]], 7
+; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[NEGYMASK]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[YMASK]]
+; CHECK-NEXT:    store i8 [[SHR]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %negy = sub i8 0, %y
+  %ymask = and i8 %y, 7
+  %negymask = and i8 %negy, 7
+  %shl = shl i8 %x, %negymask
+  %shr = lshr i8 %x, %ymask
+  store i8 %shr, i8* %p
+  %r = or i8 %shl, %shr
+  ret i8 %r
+}
+
+; Vectors follow the same rules.
+
+define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @rotl_safe_v2i32(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]])
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %negy = sub <2 x i32> zeroinitializer, %y
+  %ymask = and <2 x i32> %y, <i32 31, i32 31>
+  %negymask = and <2 x i32> %negy, <i32 31, i32 31>
+  %shl = shl <2 x i32> %x, %ymask
+  %shr = lshr <2 x i32> %x, %negymask
+  %r = or <2 x i32> %shr, %shl
+  ret <2 x i32> %r
+}
+
+; Vectors follow the same rules.
+
+define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) {
+; CHECK-LABEL: @rotr_safe_v3i16(
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]])
+; CHECK-NEXT:    ret <3 x i16> [[R]]
+;
+  %negy = sub <3 x i16> zeroinitializer, %y
+  %ymask = and <3 x i16> %y, <i16 15, i16 15, i16 15>
+  %negymask = and <3 x i16> %negy, <i16 15, i16 15, i16 15>
+  %shl = shl <3 x i16> %x, %negymask
+  %shr = lshr <3 x i16> %x, %ymask
+  %r = or <3 x i16> %shr, %shl
+  ret <3 x i16> %r
+}
+
+; These are optionally UB-free rotate left/right patterns that are narrowed to a smaller bitwidth.
+; See PR34046, PR16726, and PR39624 for motivating examples:
+; https://bugs.llvm.org/show_bug.cgi?id=34046
+; https://bugs.llvm.org/show_bug.cgi?id=16726
+; https://bugs.llvm.org/show_bug.cgi?id=39624
+
+define i16 @rotate_left_16bit(i16 %v, i32 %shift) {
+; CHECK-LABEL: @rotate_left_16bit(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i16
+; CHECK-NEXT:    [[CONV2:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[CONV2]]
+;
+  %and = and i32 %shift, 15
+  %conv = zext i16 %v to i32
+  %shl = shl i32 %conv, %and
+  %sub = sub i32 16, %and
+  %shr = lshr i32 %conv, %sub
+  %or = or i32 %shr, %shl
+  %conv2 = trunc i32 %or to i16
+  ret i16 %conv2
+}
+
+; Commute the 'or' operands and try a vector type.
+
+define <2 x i16> @rotate_left_commute_16bit_vec(<2 x i16> %v, <2 x i32> %shift) {
+; CHECK-LABEL: @rotate_left_commute_16bit_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[SHIFT:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[CONV2:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[V:%.*]], <2 x i16> [[V]], <2 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i16> [[CONV2]]
+;
+  %and = and <2 x i32> %shift, <i32 15, i32 15>
+  %conv = zext <2 x i16> %v to <2 x i32>
+  %shl = shl <2 x i32> %conv, %and
+  %sub = sub <2 x i32> <i32 16, i32 16>, %and
+  %shr = lshr <2 x i32> %conv, %sub
+  %or = or <2 x i32> %shl, %shr
+  %conv2 = trunc <2 x i32> %or to <2 x i16>
+  ret <2 x i16> %conv2
+}
+
+; Change the size, rotation direction (the subtract is on the left-shift), and mask op.
+
+define i8 @rotate_right_8bit(i8 %v, i3 %shift) {
+; CHECK-LABEL: @rotate_right_8bit(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i3 [[SHIFT:%.*]] to i8
+; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
+; CHECK-NEXT:    ret i8 [[CONV2]]
+;
+  %and = zext i3 %shift to i32
+  %conv = zext i8 %v to i32
+  %shr = lshr i32 %conv, %and
+  %sub = sub i32 8, %and
+  %shl = shl i32 %conv, %sub
+  %or = or i32 %shl, %shr
+  %conv2 = trunc i32 %or to i8
+  ret i8 %conv2
+}
+
+; The shifted value does not need to be a zexted value; here it is masked.
+; The shift mask could be less than the bitwidth, but this is still ok.
+
+define i8 @rotate_right_commute_8bit(i32 %v, i32 %shift) {
+; CHECK-LABEL: @rotate_right_commute_8bit(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHIFT:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[V:%.*]] to i8
+; CHECK-NEXT:    [[CONV2:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP3]], i8 [[TMP3]], i8 [[TMP2]])
+; CHECK-NEXT:    ret i8 [[CONV2]]
+;
+  %and = and i32 %shift, 3
+  %conv = and i32 %v, 255
+  %shr = lshr i32 %conv, %and
+  %sub = sub i32 8, %and
+  %shl = shl i32 %conv, %sub
+  %or = or i32 %shr, %shl
+  %conv2 = trunc i32 %or to i8
+  ret i8 %conv2
+}
+
+; If the original source does not mask the shift amount,
+; we still do the transform by adding masks to make it safe.
+
+define i8 @rotate8_not_safe(i8 %v, i32 %shamt) {
+; CHECK-LABEL: @rotate8_not_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %conv = zext i8 %v to i32
+  %sub = sub i32 8, %shamt
+  %shr = lshr i32 %conv, %sub
+  %shl = shl i32 %conv, %shamt
+  %or = or i32 %shr, %shl
+  %ret = trunc i32 %or to i8
+  ret i8 %ret
+}
+
+; A non-power-of-2 destination type can't be masked as above.
+
+define i9 @rotate9_not_safe(i9 %v, i32 %shamt) {
+; CHECK-LABEL: @rotate9_not_safe(
+; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 9, [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[CONV]], [[SUB]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[SHAMT]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[OR]] to i9
+; CHECK-NEXT:    ret i9 [[RET]]
+;
+  %conv = zext i9 %v to i32
+  %sub = sub i32 9, %shamt
+  %shr = lshr i32 %conv, %sub
+  %shl = shl i32 %conv, %shamt
+  %or = or i32 %shr, %shl
+  %ret = trunc i32 %or to i9
+  ret i9 %ret
+}
+
+; We should narrow (v << (s & 15)) | (v >> (-s & 15))
+; when both v and s have been promoted.
+
+define i16 @rotateleft_16_neg_mask(i16 %v, i16 %shamt) {
+; CHECK-LABEL: @rotateleft_16_neg_mask(
+; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %neg = sub i16 0, %shamt
+  %lshamt = and i16 %shamt, 15
+  %lshamtconv = zext i16 %lshamt to i32
+  %rshamt = and i16 %neg, 15
+  %rshamtconv = zext i16 %rshamt to i32
+  %conv = zext i16 %v to i32
+  %shl = shl i32 %conv, %lshamtconv
+  %shr = lshr i32 %conv, %rshamtconv
+  %or = or i32 %shr, %shl
+  %ret = trunc i32 %or to i16
+  ret i16 %ret
+}
+
+define i16 @rotateleft_16_neg_mask_commute(i16 %v, i16 %shamt) {
+; CHECK-LABEL: @rotateleft_16_neg_mask_commute(
+; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshl.i16(i16 [[V:%.*]], i16 [[V]], i16 [[SHAMT:%.*]])
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %neg = sub i16 0, %shamt
+  %lshamt = and i16 %shamt, 15
+  %lshamtconv = zext i16 %lshamt to i32
+  %rshamt = and i16 %neg, 15
+  %rshamtconv = zext i16 %rshamt to i32
+  %conv = zext i16 %v to i32
+  %shl = shl i32 %conv, %lshamtconv
+  %shr = lshr i32 %conv, %rshamtconv
+  %or = or i32 %shl, %shr
+  %ret = trunc i32 %or to i16
+  ret i16 %ret
+}
+
+define i8 @rotateright_8_neg_mask(i8 %v, i8 %shamt) {
+; CHECK-LABEL: @rotateright_8_neg_mask(
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %neg = sub i8 0, %shamt
+  %rshamt = and i8 %shamt, 7
+  %rshamtconv = zext i8 %rshamt to i32
+  %lshamt = and i8 %neg, 7
+  %lshamtconv = zext i8 %lshamt to i32
+  %conv = zext i8 %v to i32
+  %shl = shl i32 %conv, %lshamtconv
+  %shr = lshr i32 %conv, %rshamtconv
+  %or = or i32 %shr, %shl
+  %ret = trunc i32 %or to i8
+  ret i8 %ret
+}
+
+define i8 @rotateright_8_neg_mask_commute(i8 %v, i8 %shamt) {
+; CHECK-LABEL: @rotateright_8_neg_mask_commute(
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshr.i8(i8 [[V:%.*]], i8 [[V]], i8 [[SHAMT:%.*]])
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %neg = sub i8 0, %shamt
+  %rshamt = and i8 %shamt, 7
+  %rshamtconv = zext i8 %rshamt to i32
+  %lshamt = and i8 %neg, 7
+  %lshamtconv = zext i8 %lshamt to i32
+  %conv = zext i8 %v to i32
+  %shl = shl i32 %conv, %lshamtconv
+  %shr = lshr i32 %conv, %rshamtconv
+  %or = or i32 %shl, %shr
+  %ret = trunc i32 %or to i8
+  ret i8 %ret
+}
+
+; The shift amount may already be in the wide type,
+; so we need to truncate it going into the rotate pattern.
+
+define i16 @rotateright_16_neg_mask_wide_amount(i16 %v, i32 %shamt) {
+; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
+; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %neg = sub i32 0, %shamt
+  %rshamt = and i32 %shamt, 15
+  %lshamt = and i32 %neg, 15
+  %conv = zext i16 %v to i32
+  %shl = shl i32 %conv, %lshamt
+  %shr = lshr i32 %conv, %rshamt
+  %or = or i32 %shr, %shl
+  %ret = trunc i32 %or to i16
+  ret i16 %ret
+}
+
+define i16 @rotateright_16_neg_mask_wide_amount_commute(i16 %v, i32 %shamt) {
+; CHECK-LABEL: @rotateright_16_neg_mask_wide_amount_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i16
+; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.fshr.i16(i16 [[V:%.*]], i16 [[V]], i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %neg = sub i32 0, %shamt
+  %rshamt = and i32 %shamt, 15
+  %lshamt = and i32 %neg, 15
+  %conv = zext i16 %v to i32
+  %shl = shl i32 %conv, %lshamt
+  %shr = lshr i32 %conv, %rshamt
+  %or = or i32 %shl, %shr
+  %ret = trunc i32 %or to i16
+  ret i16 %ret
+}
+
+define i8 @rotateleft_8_neg_mask_wide_amount(i8 %v, i32 %shamt) {
+; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %neg = sub i32 0, %shamt
+  %lshamt = and i32 %shamt, 7
+  %rshamt = and i32 %neg, 7
+  %conv = zext i8 %v to i32
+  %shl = shl i32 %conv, %lshamt
+  %shr = lshr i32 %conv, %rshamt
+  %or = or i32 %shr, %shl
+  %ret = trunc i32 %or to i8
+  ret i8 %ret
+}
+
+define i8 @rotateleft_8_neg_mask_wide_amount_commute(i8 %v, i32 %shamt) {
+; CHECK-LABEL: @rotateleft_8_neg_mask_wide_amount_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[SHAMT:%.*]] to i8
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.fshl.i8(i8 [[V:%.*]], i8 [[V]], i8 [[TMP1]])
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %neg = sub i32 0, %shamt
+  %lshamt = and i32 %shamt, 7
+  %rshamt = and i32 %neg, 7
+  %conv = zext i8 %v to i32
+  %shl = shl i32 %conv, %lshamt
+  %shr = lshr i32 %conv, %rshamt
+  %or = or i32 %shl, %shr
+  %ret = trunc i32 %or to i8
+  ret i8 %ret
+}
+
+; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
+
+define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
+; CHECK-LABEL: @rotateleft_9_neg_mask_wide_amount_commute(
+; CHECK-NEXT:    [[NEG:%.*]] = sub i33 0, [[SHAMT:%.*]]
+; CHECK-NEXT:    [[LSHAMT:%.*]] = and i33 [[SHAMT]], 8
+; CHECK-NEXT:    [[RSHAMT:%.*]] = and i33 [[NEG]], 8
+; CHECK-NEXT:    [[CONV:%.*]] = zext i9 [[V:%.*]] to i33
+; CHECK-NEXT:    [[SHL:%.*]] = shl i33 [[CONV]], [[LSHAMT]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i33 [[CONV]], [[RSHAMT]]
+; CHECK-NEXT:    [[OR:%.*]] = or i33 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[RET:%.*]] = trunc i33 [[OR]] to i9
+; CHECK-NEXT:    ret i9 [[RET]]
+;
+  %neg = sub i33 0, %shamt
+  %lshamt = and i33 %shamt, 8
+  %rshamt = and i33 %neg, 8
+  %conv = zext i9 %v to i33
+  %shl = shl i33 %conv, %lshamt
+  %shr = lshr i33 %conv, %rshamt
+  %or = or i33 %shl, %shr
+  %ret = trunc i33 %or to i9
+  ret i9 %ret
+}
+
+; Convert select pattern to masked shift that ends in 'or'.
+
+define i32 @rotr_select(i32 %x, i32 %shamt) {
+; CHECK-LABEL: @rotr_select(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cmp = icmp eq i32 %shamt, 0
+  %sub = sub i32 32, %shamt
+  %shr = lshr i32 %x, %shamt
+  %shl = shl i32 %x, %sub
+  %or = or i32 %shr, %shl
+  %r = select i1 %cmp, i32 %x, i32 %or
+  ret i32 %r
+}
+
+; Convert select pattern to masked shift that ends in 'or'.
+
+define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
+; CHECK-LABEL: @rotr_select_commute(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp eq i8 %shamt, 0
+  %sub = sub i8 8, %shamt
+  %shr = lshr i8 %x, %shamt
+  %shl = shl i8 %x, %sub
+  %or = or i8 %shl, %shr
+  %r = select i1 %cmp, i8 %x, i8 %or
+  ret i8 %r
+}
+
+; Convert select pattern to masked shift that ends in 'or'.
+
+define i16 @rotl_select(i16 %x, i16 %shamt) {
+; CHECK-LABEL: @rotl_select(
+; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]])
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %cmp = icmp eq i16 %shamt, 0
+  %sub = sub i16 16, %shamt
+  %shr = lshr i16 %x, %sub
+  %shl = shl i16 %x, %shamt
+  %or = or i16 %shr, %shl
+  %r = select i1 %cmp, i16 %x, i16 %or
+  ret i16 %r
+}
+
+; Convert select pattern to masked shift that ends in 'or'.
+
+define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) {
+; CHECK-LABEL: @rotl_select_commute(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]])
+; CHECK-NEXT:    ret <2 x i64> [[R]]
+;
+  %cmp = icmp eq <2 x i64> %shamt, zeroinitializer
+  %sub = sub <2 x i64> <i64 64, i64 64>, %shamt
+  %shr = lshr <2 x i64> %x, %sub
+  %shl = shl <2 x i64> %x, %shamt
+  %or = or <2 x i64> %shl, %shr
+  %r = select <2 x i1> %cmp, <2 x i64> %x, <2 x i64> %or
+  ret <2 x i64> %r
+}
+
+; Negative test - the transform is only valid with power-of-2 types.
+
+define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {
+; CHECK-LABEL: @rotl_select_weird_type(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i24 [[SHAMT:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub i24 24, [[SHAMT]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]]
+; CHECK-NEXT:    [[OR:%.*]] = or i24 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]]
+; CHECK-NEXT:    ret i24 [[R]]
+;
+  %cmp = icmp eq i24 %shamt, 0
+  %sub = sub i24 24, %shamt
+  %shr = lshr i24 %x, %sub
+  %shl = shl i24 %x, %shamt
+  %or = or i24 %shl, %shr
+  %r = select i1 %cmp, i24 %x, i24 %or
+  ret i24 %r
+}
+
+; Test that the transform doesn't crash when there's an "or" with a ConstantExpr operand.
+
+@external_global = external global i8
+
+define i32 @rotl_constant_expr(i32 %shamt) {
+; CHECK-LABEL: @rotl_constant_expr(
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 ptrtoint (i8* @external_global to i32), [[SHAMT:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], shl (i32 ptrtoint (i8* @external_global to i32), i32 11)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %shr = lshr i32 ptrtoint (i8* @external_global to i32), %shamt
+  %r = or i32 %shr, shl (i32 ptrtoint (i8* @external_global to i32), i32 11)
+  ret i32 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/round.ll b/llvm/test/Transforms/InstCombine/round.ll
new file mode 100644
index 00000000000..ecc62dda36b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/round.ll
@@ -0,0 +1,90 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.round.f32(float) #0
+declare double @llvm.round.f64(double) #0
+
+; CHECK-LABEL: @constant_fold_round_f32_01
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_round_f32_01() #0 {
+  %x = call float @llvm.round.f32(float 1.25) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_02
+; CHECK-NEXT: ret float -1.000000e+00
+define float @constant_fold_round_f32_02() #0 {
+  %x = call float @llvm.round.f32(float -1.25) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_03
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_round_f32_03() #0 {
+  %x = call float @llvm.round.f32(float 1.5) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_04
+; CHECK-NEXT: ret float -2.000000e+00
+define float @constant_fold_round_f32_04() #0 {
+  %x = call float @llvm.round.f32(float -1.5) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_05
+; CHECK-NEXT: ret float 3.000000e+00
+define float @constant_fold_round_f32_05() #0 {
+  %x = call float @llvm.round.f32(float 2.75) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f32_06
+; CHECK-NEXT: ret float -3.000000e+00
+define float @constant_fold_round_f32_06() #0 {
+  %x = call float @llvm.round.f32(float -2.75) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_01
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_round_f64_01() #0 {
+  %x = call double @llvm.round.f64(double 1.3) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_02
+; CHECK-NEXT: ret double -1.000000e+00
+define double @constant_fold_round_f64_02() #0 {
+  %x = call double @llvm.round.f64(double -1.3) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_03
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_round_f64_03() #0 {
+  %x = call double @llvm.round.f64(double 1.5) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_04
+; CHECK-NEXT: ret double -2.000000e+00
+define double @constant_fold_round_f64_04() #0 {
+  %x = call double @llvm.round.f64(double -1.5) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_05
+; CHECK-NEXT: ret double 3.000000e+00
+define double @constant_fold_round_f64_05() #0 {
+  %x = call double @llvm.round.f64(double 2.7) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_round_f64_06
+; CHECK-NEXT: ret double -3.000000e+00
+define double @constant_fold_round_f64_06() #0 {
+  %x = call double @llvm.round.f64(double -2.7) #0
+  ret double %x
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
new file mode 100644
index 00000000000..0acf6036624
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32>, <2 x i32>)
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
+
+declare { i8, i1 } @llvm.sadd.with.overflow.i8(i8, i8)
+
+define { i32, i1 } @simple_fold(i32 %x) {
+; CHECK-LABEL: @simple_fold(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 20)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = add nsw i32 %x, 7
+  %b = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 13)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @fold_mixed_signs(i32 %x) {
+; CHECK-LABEL: @fold_mixed_signs(
+; CHECK-NEXT:    [[B:%.*]] = add nsw i32 [[X:%.*]], 6
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[B]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = add nsw i32 %x, 13
+  %b = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 -7)
+  ret { i32, i1 } %b
+}
+
+define { i8, i1 } @fold_on_constant_add_no_overflow(i8 %x) {
+; CHECK-LABEL: @fold_on_constant_add_no_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 127)
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %a = add nsw i8 %x, 100
+  %b = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 27)
+  ret { i8, i1 } %b
+}
+
+define { i8, i1 } @no_fold_on_constant_add_overflow(i8 %x) {
+; CHECK-LABEL: @no_fold_on_constant_add_overflow(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], 100
+; CHECK-NEXT:    [[B:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A]], i8 28)
+; CHECK-NEXT:    ret { i8, i1 } [[B]]
+;
+  %a = add nsw i8 %x, 100
+  %b = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 28)
+  ret { i8, i1 } %b
+}
+
+define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
+; CHECK-LABEL: @fold_simple_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 42, i32 42>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[TMP1]]
+;
+  %a = add nsw <2 x i32> %x, <i32 12, i32 12>
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
+; CHECK-LABEL: @no_fold_splat_undef_constant(
+; CHECK-NEXT:    [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], <i32 12, i32 undef>
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
+;
+  %a = add nsw <2 x i32> %x, <i32 12, i32 undef>
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @no_fold_splat_not_constant(
+; CHECK-NEXT:    [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
+;
+  %a = add nsw <2 x i32> %x, %y
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { i32, i1 } @fold_nuwnsw(i32 %x) {
+; CHECK-LABEL: @fold_nuwnsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = add nuw nsw i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 30)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @no_fold_nuw(i32 %x) {
+; CHECK-LABEL: @no_fold_nuw(
+; CHECK-NEXT:    [[A:%.*]] = add nuw i32 [[X:%.*]], 12
+; CHECK-NEXT:    [[B:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A]], i32 30)
+; CHECK-NEXT:    ret { i32, i1 } [[B]]
+;
+  %a = add nuw i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 30)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @no_fold_wrapped_add(i32 %x) {
+; CHECK-LABEL: @no_fold_wrapped_add(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 12
+; CHECK-NEXT:    [[B:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A]], i32 30)
+; CHECK-NEXT:    ret { i32, i1 } [[B]]
+;
+  %a = add i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 30, i32 %a)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @fold_sub_simple(i32 %x) {
+; CHECK-LABEL: @fold_sub_simple(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = sub nsw i32 %x, -12
+  %b = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 30)
+  ret { i32, i1 } %b
+}
diff --git a/llvm/test/Transforms/InstCombine/salvage-dbg-declare.ll b/llvm/test/Transforms/InstCombine/salvage-dbg-declare.ll
new file mode 100644
index 00000000000..eaf09569feb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/salvage-dbg-declare.ll
@@ -0,0 +1,49 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+
+declare dso_local i32 @bar(i8*)
+
+; Function Attrs: nounwind
+define internal i32 @foo() #0 !dbg !1 {
+; CHECK:  %[[VLA:.*]] = alloca [2 x i32]
+; CHECK:  call void @llvm.dbg.declare(metadata [2 x i32]* %[[VLA]], {{.*}}, metadata !DIExpression())
+
+entry:
+  %vla = alloca i32, i64 2, align 4, !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %vla, metadata !19, metadata !DIExpression()), !dbg !20
+  %0 = bitcast i32* %vla to i8*, !dbg !21
+  %call = call i32 @bar(i8* %0), !dbg !22
+  unreachable
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DISubprogram(name: "a", scope: !2, file: !2, line: 232, type: !3, isLocal: true, isDefinition: true, scopeLine: 234, flags: DIFlagPrototyped, isOptimized: true, unit: !5, retainedNodes: !6)
+!2 = !DIFile(filename: "b", directory: "c")
+!3 = !DISubroutineType(types: !4)
+!4 = !{}
+!5 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4, globals: !4)
+!6 = !{!7, !11}
+!7 = !DILocalVariable(name: "__vla_expr", scope: !8, type: !10, flags: DIFlagArtificial)
+!8 = distinct !DILexicalBlock(scope: !9, file: !2, line: 238, column: 39)
+!9 = distinct !DILexicalBlock(scope: !1, file: !2, line: 238, column: 6)
+!10 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+!11 = !DILocalVariable(name: "ptr32", scope: !8, file: !2, line: 240, type: !12)
+!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, elements: !14)
+!13 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!14 = !{!15}
+!15 = !DISubrange(count: !7)
+!16 = !DILocation(line: 240, column: 3, scope: !17)
+!17 = distinct !DILexicalBlock(scope: !18, file: !2, line: 238, column: 39)
+!18 = distinct !DILexicalBlock(scope: !1, file: !2, line: 238, column: 6)
+!19 = !DILocalVariable(name: "ptr32", scope: !17, file: !2, line: 240, type: !12)
+!20 = !DILocation(line: 240, column: 12, scope: !17)
+!21 = !DILocation(line: 241, column: 65, scope: !17)
+!22 = !DILocation(line: 241, column: 11, scope: !17)
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
new file mode 100644
index 00000000000..8b50eb64262
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -0,0 +1,1462 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; Saturating addition.
+;
+
+declare i8 @llvm.uadd.sat.i8(i8, i8)
+declare i8 @llvm.sadd.sat.i8(i8, i8)
+declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
+declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
+
+; Constant uadd argument is canonicalized to the right.
+define i8 @test_scalar_uadd_canonical(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_canonical(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[A:%.*]], i8 10)
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %x = call i8 @llvm.uadd.sat.i8(i8 10, i8 %a)
+  ret i8 %x
+}
+
+define <2 x i8> @test_vector_uadd_canonical(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_canonical(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 10, i8 20>)
+; CHECK-NEXT:    ret <2 x i8> [[X]]
+;
+  %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> <i8 10, i8 20>, <2 x i8> %a)
+  ret <2 x i8> %x
+}
+
+; Constant sadd argument is canonicalized to the right.
+define i8 @test_scalar_sadd_canonical(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_canonical(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 -10)
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %x = call i8 @llvm.sadd.sat.i8(i8 -10, i8 %a)
+  ret i8 %x
+}
+
+define <2 x i8> @test_vector_sadd_canonical(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_sadd_canonical(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 10, i8 -20>)
+; CHECK-NEXT:    ret <2 x i8> [[X]]
+;
+  %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 10, i8 -20>, <2 x i8> %a)
+  ret <2 x i8> %x
+}
+
+; Can combine uadds with constant operands.
+define i8 @test_scalar_uadd_combine(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_combine(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[A:%.*]], i8 30)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %x1 = call i8 @llvm.uadd.sat.i8(i8 %a, i8 10)
+  %x2 = call i8 @llvm.uadd.sat.i8(i8 %x1, i8 20)
+  ret i8 %x2
+}
+
+define <2 x i8> @test_vector_uadd_combine(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_combine(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 30, i8 30>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %x1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
+  %x2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x1, <2 x i8> <i8 20, i8 20>)
+  ret <2 x i8> %x2
+}
+
+; This could simplify, but currently doesn't.
+define <2 x i8> @test_vector_uadd_combine_non_splat(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_combine_non_splat(
+; CHECK-NEXT:    [[X1:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 10, i8 20>)
+; CHECK-NEXT:    [[X2:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X1]], <2 x i8> <i8 30, i8 40>)
+; CHECK-NEXT:    ret <2 x i8> [[X2]]
+;
+  %x1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 20>)
+  %x2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x1, <2 x i8> <i8 30, i8 40>)
+  ret <2 x i8> %x2
+}
+
+; Can combine uadds even if they overflow.
+define i8 @test_scalar_uadd_overflow(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_overflow(
+; CHECK-NEXT:    ret i8 -1
+;
+  %y1 = call i8 @llvm.uadd.sat.i8(i8 %a, i8 100)
+  %y2 = call i8 @llvm.uadd.sat.i8(i8 %y1, i8 200)
+  ret i8 %y2
+}
+
+define <2 x i8> @test_vector_uadd_overflow(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_overflow(
+; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+;
+  %y1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 100, i8 100>)
+  %y2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %y1, <2 x i8> <i8 200, i8 200>)
+  ret <2 x i8> %y2
+}
+
+; Can combine sadds if sign matches.
+define i8 @test_scalar_sadd_both_positive(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_both_positive(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 30)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %z1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 10)
+  %z2 = call i8 @llvm.sadd.sat.i8(i8 %z1, i8 20)
+  ret i8 %z2
+}
+
+define <2 x i8> @test_vector_sadd_both_positive(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_sadd_both_positive(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 30, i8 30>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %z1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
+  %z2 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %z1, <2 x i8> <i8 20, i8 20>)
+  ret <2 x i8> %z2
+}
+
+define i8 @test_scalar_sadd_both_negative(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_both_negative(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 -30)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %u1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 -10)
+  %u2 = call i8 @llvm.sadd.sat.i8(i8 %u1, i8 -20)
+  ret i8 %u2
+}
+
+define <2 x i8> @test_vector_sadd_both_negative(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_sadd_both_negative(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -30, i8 -30>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %u1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 -10, i8 -10>)
+  %u2 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %u1, <2 x i8> <i8 -20, i8 -20>)
+  ret <2 x i8> %u2
+}
+
+; Can't combine sadds if constants have different sign.
+define i8 @test_scalar_sadd_different_sign(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_different_sign(
+; CHECK-NEXT:    [[V1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 10)
+; CHECK-NEXT:    [[V2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[V1]], i8 -20)
+; CHECK-NEXT:    ret i8 [[V2]]
+;
+  %v1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 10)
+  %v2 = call i8 @llvm.sadd.sat.i8(i8 %v1, i8 -20)
+  ret i8 %v2
+}
+
+; Can't combine sadds if they overflow.
+define i8 @test_scalar_sadd_overflow(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_overflow(
+; CHECK-NEXT:    [[W1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 100)
+; CHECK-NEXT:    [[W2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[W1]], i8 100)
+; CHECK-NEXT:    ret i8 [[W2]]
+;
+  %w1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 100)
+  %w2 = call i8 @llvm.sadd.sat.i8(i8 %w1, i8 100)
+  ret i8 %w2
+}
+
+; neg uadd neg always overflows.
+define i8 @test_scalar_uadd_neg_neg(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_neg_neg(
+; CHECK-NEXT:    ret i8 -1
+;
+  %a_neg = or i8 %a, -128
+  %r = call i8 @llvm.uadd.sat.i8(i8 %a_neg, i8 -10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_uadd_neg_neg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_neg_neg(
+; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+;
+  %a_neg = or <2 x i8> %a, <i8 -128, i8 -128>
+  %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> <i8 -10, i8 -20>)
+  ret <2 x i8> %r
+}
+
+; nneg uadd nneg never overflows.
+define i8 @test_scalar_uadd_nneg_nneg(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_nneg_nneg(
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and i8 [[A:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = add nuw i8 [[A_NNEG]], 10
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_nneg = and i8 %a, 127
+  %r = call i8 @llvm.uadd.sat.i8(i8 %a_nneg, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_uadd_nneg_nneg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_nneg_nneg(
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[R:%.*]] = add nuw <2 x i8> [[A_NNEG]], <i8 10, i8 20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_nneg = and <2 x i8> %a, <i8 127, i8 127>
+  %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> <i8 10, i8 20>)
+  ret <2 x i8> %r
+}
+
+; neg uadd nneg might overflow.
+define i8 @test_scalar_uadd_neg_nneg(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_neg_nneg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or i8 [[A:%.*]], -128
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[A_NEG]], i8 10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_neg = or i8 %a, -128
+  %r = call i8 @llvm.uadd.sat.i8(i8 %a_neg, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_uadd_neg_nneg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_neg_nneg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> <i8 10, i8 20>)
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_neg = or <2 x i8> %a, <i8 -128, i8 -128>
+  %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> <i8 10, i8 20>)
+  ret <2 x i8> %r
+}
+
+define i8 @test_scalar_uadd_never_overflows(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_never_overflows(
+; CHECK-NEXT:    [[A_MASKED:%.*]] = and i8 [[A:%.*]], -127
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i8 [[A_MASKED]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_masked = and i8 %a, 129
+  %r = call i8 @llvm.uadd.sat.i8(i8 %a_masked, i8 1)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_uadd_never_overflows(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_never_overflows(
+; CHECK-NEXT:    [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i8> [[A_MASKED]], <i8 1, i8 1>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_masked = and <2 x i8> %a, <i8 129, i8 129>
+  %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_masked, <2 x i8> <i8 1, i8 1>)
+  ret <2 x i8> %r
+}
+
+define i8 @test_scalar_uadd_always_overflows(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_always_overflows(
+; CHECK-NEXT:    ret i8 -1
+;
+  %a_masked = or i8 %a, 192
+  %r = call i8 @llvm.uadd.sat.i8(i8 %a_masked, i8 64)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_uadd_always_overflows(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_uadd_always_overflows(
+; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+;
+  %a_masked = or <2 x i8> %a, <i8 192, i8 192>
+  %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_masked, <2 x i8> <i8 64, i8 64>)
+  ret <2 x i8> %r
+}
+
+; neg sadd nneg never overflows.
+define i8 @test_scalar_sadd_neg_nneg(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_neg_nneg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or i8 [[A:%.*]], -128
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[A_NEG]], 10
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_neg = or i8 %a, -128
+  %r = call i8 @llvm.sadd.sat.i8(i8 %a_neg, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_sadd_neg_nneg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_sadd_neg_nneg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_NEG]], <i8 10, i8 20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_neg = or <2 x i8> %a, <i8 -128, i8 -128>
+  %r = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> <i8 10, i8 20>)
+  ret <2 x i8> %r
+}
+
+; nneg sadd neg never overflows.
+define i8 @test_scalar_sadd_nneg_neg(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_nneg_neg(
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and i8 [[A:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[A_NNEG]], -10
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_nneg = and i8 %a, 127
+  %r = call i8 @llvm.sadd.sat.i8(i8 %a_nneg, i8 -10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_sadd_nneg_neg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_sadd_nneg_neg(
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_NNEG]], <i8 -10, i8 -20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_nneg = and <2 x i8> %a, <i8 127, i8 127>
+  %r = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> <i8 -10, i8 -20>)
+  ret <2 x i8> %r
+}
+
+; neg sadd neg might overflow.
+define i8 @test_scalar_sadd_neg_neg(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_neg_neg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or i8 [[A:%.*]], -128
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A_NEG]], i8 -10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_neg = or i8 %a, -128
+  %r = call i8 @llvm.sadd.sat.i8(i8 %a_neg, i8 -10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_sadd_neg_neg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_sadd_neg_neg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> <i8 -10, i8 -20>)
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_neg = or <2 x i8> %a, <i8 -128, i8 -128>
+  %r = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> <i8 -10, i8 -20>)
+  ret <2 x i8> %r
+}
+
+; While this is a no-overflow condition, the nuw flag gets lost due to
+; canonicalization and we can no longer determine this
+define i8 @test_scalar_uadd_sub_nuw_lost_no_ov(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_sub_nuw_lost_no_ov(
+; CHECK-NEXT:    [[B:%.*]] = add i8 [[A:%.*]], -10
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[B]], i8 9)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = sub nuw i8 %a, 10
+  %r = call i8 @llvm.uadd.sat.i8(i8 %b, i8 9)
+  ret i8 %r
+}
+
+define i8 @test_scalar_uadd_urem_no_ov(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_urem_no_ov(
+; CHECK-NEXT:    [[B:%.*]] = urem i8 [[A:%.*]], 100
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i8 [[B]], -100
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = urem i8 %a, 100
+  %r = call i8 @llvm.uadd.sat.i8(i8 %b, i8 156)
+  ret i8 %r
+}
+
+define i8 @test_scalar_uadd_urem_may_ov(i8 %a) {
+; CHECK-LABEL: @test_scalar_uadd_urem_may_ov(
+; CHECK-NEXT:    [[B:%.*]] = urem i8 [[A:%.*]], 100
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[B]], i8 -99)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = urem i8 %a, 100
+  %r = call i8 @llvm.uadd.sat.i8(i8 %b, i8 157)
+  ret i8 %r
+}
+
+; We have a constant range for the LHS, but only known bits for the RHS
+define i8 @test_scalar_uadd_udiv_known_bits(i8 %a, i8 %b) {
+; CHECK-LABEL: @test_scalar_uadd_udiv_known_bits(
+; CHECK-NEXT:    [[AA:%.*]] = udiv i8 -66, [[A:%.*]]
+; CHECK-NEXT:    [[BB:%.*]] = and i8 [[B:%.*]], 63
+; CHECK-NEXT:    [[R:%.*]] = add nuw i8 [[AA]], [[BB]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %aa = udiv i8 190, %a
+  %bb = and i8 %b, 63
+  %r = call i8 @llvm.uadd.sat.i8(i8 %aa, i8 %bb)
+  ret i8 %r
+}
+
+define i8 @test_scalar_sadd_srem_no_ov(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_srem_no_ov(
+; CHECK-NEXT:    [[B:%.*]] = srem i8 [[A:%.*]], 100
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[B]], 28
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = srem i8 %a, 100
+  %r = call i8 @llvm.sadd.sat.i8(i8 %b, i8 28)
+  ret i8 %r
+}
+
+define i8 @test_scalar_sadd_srem_may_ov(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_srem_may_ov(
+; CHECK-NEXT:    [[B:%.*]] = srem i8 [[A:%.*]], 100
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[B]], i8 29)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = srem i8 %a, 100
+  %r = call i8 @llvm.sadd.sat.i8(i8 %b, i8 29)
+  ret i8 %r
+}
+
+define i8 @test_scalar_sadd_srem_and_no_ov(i8 %a, i8 %b) {
+; CHECK-LABEL: @test_scalar_sadd_srem_and_no_ov(
+; CHECK-NEXT:    [[AA:%.*]] = srem i8 [[A:%.*]], 100
+; CHECK-NEXT:    [[BB:%.*]] = and i8 [[B:%.*]], 15
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[AA]], [[BB]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %aa = srem i8 %a, 100
+  %bb = and i8 %b, 15
+  %r = call i8 @llvm.sadd.sat.i8(i8 %aa, i8 %bb)
+  ret i8 %r
+}
+
+;
+; Saturating subtraction.
+;
+
+declare i8 @llvm.usub.sat.i8(i8, i8)
+declare i8 @llvm.ssub.sat.i8(i8, i8)
+declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
+declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
+
+; Cannot canonicalize usub to uadd.
+define i8 @test_scalar_usub_canonical(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_canonical(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %r = call i8 @llvm.usub.sat.i8(i8 %a, i8 10)
+  ret i8 %r
+}
+
+; Canonicalize ssub to sadd.
+define i8 @test_scalar_ssub_canonical(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_canonical(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 -10)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %r = call i8 @llvm.ssub.sat.i8(i8 %a, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_ssub_canonical(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_ssub_canonical(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -10, i8 -10>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @test_vector_ssub_canonical_min_non_splat(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_ssub_canonical_min_non_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -10, i8 10>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 -10>)
+  ret <2 x i8> %r
+}
+
+; Cannot canonicalize signed min.
+define i8 @test_scalar_ssub_canonical_min(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_canonical_min(
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A:%.*]], i8 -128)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %r = call i8 @llvm.ssub.sat.i8(i8 %a, i8 -128)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_ssub_canonical_min(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_ssub_canonical_min(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -128, i8 -10>)
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 -128, i8 -10>)
+  ret <2 x i8> %r
+}
+
+; Can combine usubs with constant operands.
+define i8 @test_scalar_usub_combine(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_combine(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 30)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %x1 = call i8 @llvm.usub.sat.i8(i8 %a, i8 10)
+  %x2 = call i8 @llvm.usub.sat.i8(i8 %x1, i8 20)
+  ret i8 %x2
+}
+
+define <2 x i8> @test_vector_usub_combine(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_combine(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 30, i8 30>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %x1 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
+  %x2 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x1, <2 x i8> <i8 20, i8 20>)
+  ret <2 x i8> %x2
+}
+
+; This could simplify, but currently doesn't.
+define <2 x i8> @test_vector_usub_combine_non_splat(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_combine_non_splat(
+; CHECK-NEXT:    [[X1:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 10, i8 20>)
+; CHECK-NEXT:    [[X2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[X1]], <2 x i8> <i8 30, i8 40>)
+; CHECK-NEXT:    ret <2 x i8> [[X2]]
+;
+  %x1 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 20>)
+  %x2 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x1, <2 x i8> <i8 30, i8 40>)
+  ret <2 x i8> %x2
+}
+
+; Can combine usubs even if they overflow.
+define i8 @test_scalar_usub_overflow(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_overflow(
+; CHECK-NEXT:    ret i8 0
+;
+  %y1 = call i8 @llvm.usub.sat.i8(i8 %a, i8 100)
+  %y2 = call i8 @llvm.usub.sat.i8(i8 %y1, i8 200)
+  ret i8 %y2
+}
+
+define <2 x i8> @test_vector_usub_overflow(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_overflow(
+; CHECK-NEXT:    ret <2 x i8> zeroinitializer
+;
+  %y1 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 100, i8 100>)
+  %y2 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %y1, <2 x i8> <i8 200, i8 200>)
+  ret <2 x i8> %y2
+}
+
+; Can combine ssubs if sign matches.
+define i8 @test_scalar_ssub_both_positive(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_both_positive(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 -30)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %z1 = call i8 @llvm.ssub.sat.i8(i8 %a, i8 10)
+  %z2 = call i8 @llvm.ssub.sat.i8(i8 %z1, i8 20)
+  ret i8 %z2
+}
+
+define <2 x i8> @test_vector_ssub_both_positive(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_ssub_both_positive(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -30, i8 -30>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %z1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
+  %z2 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %z1, <2 x i8> <i8 20, i8 20>)
+  ret <2 x i8> %z2
+}
+
+define i8 @test_scalar_ssub_both_negative(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_both_negative(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 30)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %u1 = call i8 @llvm.ssub.sat.i8(i8 %a, i8 -10)
+  %u2 = call i8 @llvm.ssub.sat.i8(i8 %u1, i8 -20)
+  ret i8 %u2
+}
+
+define <2 x i8> @test_vector_ssub_both_negative(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_ssub_both_negative(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 30, i8 30>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %u1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 -10, i8 -10>)
+  %u2 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %u1, <2 x i8> <i8 -20, i8 -20>)
+  ret <2 x i8> %u2
+}
+
+; Can't combine ssubs if constants have different sign.
+define i8 @test_scalar_ssub_different_sign(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_different_sign(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 -10)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[TMP1]], i8 20)
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %v1 = call i8 @llvm.ssub.sat.i8(i8 %a, i8 10)
+  %v2 = call i8 @llvm.ssub.sat.i8(i8 %v1, i8 -20)
+  ret i8 %v2
+}
+
+; Can combine sadd and ssub with appropriate signs.
+define i8 @test_scalar_sadd_ssub(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_ssub(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 30)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %v1 = call i8 @llvm.sadd.sat.i8(i8 10, i8 %a)
+  %v2 = call i8 @llvm.ssub.sat.i8(i8 %v1, i8 -20)
+  ret i8 %v2
+}
+
+define <2 x i8> @test_vector_sadd_ssub(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_sadd_ssub(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -30, i8 -30>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %v1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 -10, i8 -10>, <2 x i8> %a)
+  %v2 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %v1, <2 x i8> <i8 20, i8 20>)
+  ret <2 x i8> %v2
+}
+
+; Can't combine ssubs if they overflow.
+define i8 @test_scalar_ssub_overflow(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A:%.*]], i8 -100)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[TMP1]], i8 -100)
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %w1 = call i8 @llvm.ssub.sat.i8(i8 %a, i8 100)
+  %w2 = call i8 @llvm.ssub.sat.i8(i8 %w1, i8 100)
+  ret i8 %w2
+}
+
+; nneg usub neg always overflows.
+define i8 @test_scalar_usub_nneg_neg(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_nneg_neg(
+; CHECK-NEXT:    ret i8 0
+;
+  %a_nneg = and i8 %a, 127
+  %r = call i8 @llvm.usub.sat.i8(i8 %a_nneg, i8 -10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_usub_nneg_neg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_nneg_neg(
+; CHECK-NEXT:    ret <2 x i8> zeroinitializer
+;
+  %a_nneg = and <2 x i8> %a, <i8 127, i8 127>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> <i8 -10, i8 -20>)
+  ret <2 x i8> %r
+}
+
+; neg usub nneg never overflows.
+define i8 @test_scalar_usub_neg_nneg(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_neg_nneg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or i8 [[A:%.*]], -128
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[A_NEG]], -10
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_neg = or i8 %a, -128
+  %r = call i8 @llvm.usub.sat.i8(i8 %a_neg, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_usub_neg_nneg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_neg_nneg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[A_NEG]], <i8 -10, i8 -20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_neg = or <2 x i8> %a, <i8 -128, i8 -128>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a_neg, <2 x i8> <i8 10, i8 20>)
+  ret <2 x i8> %r
+}
+
+; nneg usub nneg never may overflow.
+define i8 @test_scalar_usub_nneg_nneg(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_nneg_nneg(
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and i8 [[A:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A_NNEG]], i8 10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_nneg = and i8 %a, 127
+  %r = call i8 @llvm.usub.sat.i8(i8 %a_nneg, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_usub_nneg_nneg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_nneg_nneg(
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A_NNEG]], <2 x i8> <i8 10, i8 20>)
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_nneg = and <2 x i8> %a, <i8 127, i8 127>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> <i8 10, i8 20>)
+  ret <2 x i8> %r
+}
+
+define i8 @test_scalar_usub_never_overflows(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_never_overflows(
+; CHECK-NEXT:    [[A_MASKED:%.*]] = or i8 [[A:%.*]], 64
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[A_MASKED]], -10
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_masked = or i8 %a, 64
+  %r = call i8 @llvm.usub.sat.i8(i8 %a_masked, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_usub_never_overflows(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_never_overflows(
+; CHECK-NEXT:    [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], <i8 64, i8 64>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_MASKED]], <i8 -10, i8 -10>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_masked = or <2 x i8> %a, <i8 64, i8 64>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a_masked, <2 x i8> <i8 10, i8 10>)
+  ret <2 x i8> %r
+}
+
+define i8 @test_scalar_usub_always_overflows(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_always_overflows(
+; CHECK-NEXT:    ret i8 0
+;
+  %a_masked = and i8 %a, 64
+  %r = call i8 @llvm.usub.sat.i8(i8 %a_masked, i8 100)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_usub_always_overflows(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_always_overflows(
+; CHECK-NEXT:    ret <2 x i8> zeroinitializer
+;
+  %a_masked = and <2 x i8> %a, <i8 64, i8 64>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a_masked, <2 x i8> <i8 100, i8 100>)
+  ret <2 x i8> %r
+}
+
+; neg ssub neg never overflows.
+define i8 @test_scalar_ssub_neg_neg(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_neg_neg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or i8 [[A:%.*]], -128
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[A_NEG]], 10
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_neg = or i8 %a, -128
+  %r = call i8 @llvm.ssub.sat.i8(i8 %a_neg, i8 -10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_ssub_neg_neg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_ssub_neg_neg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_NEG]], <i8 10, i8 20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_neg = or <2 x i8> %a, <i8 -128, i8 -128>
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a_neg, <2 x i8> <i8 -10, i8 -20>)
+  ret <2 x i8> %r
+}
+
+; nneg ssub nneg never overflows.
+define i8 @test_scalar_ssub_nneg_nneg(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_nneg_nneg(
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and i8 [[A:%.*]], 127
+; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[A_NNEG]], -10
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a_nneg = and i8 %a, 127
+  %r = call i8 @llvm.ssub.sat.i8(i8 %a_nneg, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_ssub_nneg_nneg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_ssub_nneg_nneg(
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_NNEG]], <i8 -10, i8 -20>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %a_nneg = and <2 x i8> %a, <i8 127, i8 127>
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> <i8 10, i8 20>)
+  ret <2 x i8> %r
+}
+
+; neg ssub nneg may overflow.
+define i8 @test_scalar_ssub_neg_nneg(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_neg_nneg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or i8 [[A:%.*]], -128
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A_NEG]], i8 -10)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %a_neg = or i8 %a, -128
+  %r = call i8 @llvm.ssub.sat.i8(i8 %a_neg, i8 10)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_ssub_neg_nneg(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_ssub_neg_nneg(
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> <i8 -10, i8 -20>)
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %a_neg = or <2 x i8> %a, <i8 -128, i8 -128>
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a_neg, <2 x i8> <i8 10, i8 20>)
+  ret <2 x i8> %r
+}
+
+define i8 @test_scalar_usub_add_nuw_no_ov(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_add_nuw_no_ov(
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[A:%.*]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = add nuw i8 %a, 10
+  %r = call i8 @llvm.usub.sat.i8(i8 %b, i8 9)
+  ret i8 %r
+}
+
+define i8 @test_scalar_usub_add_nuw_eq(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_add_nuw_eq(
+; CHECK-NEXT:    ret i8 [[A:%.*]]
+;
+  %b = add nuw i8 %a, 10
+  %r = call i8 @llvm.usub.sat.i8(i8 %b, i8 10)
+  ret i8 %r
+}
+
+define i8 @test_scalar_usub_add_nuw_may_ov(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_add_nuw_may_ov(
+; CHECK-NEXT:    [[B:%.*]] = add nuw i8 [[A:%.*]], 10
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 11)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = add nuw i8 %a, 10
+  %r = call i8 @llvm.usub.sat.i8(i8 %b, i8 11)
+  ret i8 %r
+}
+
+define i8 @test_scalar_usub_urem_must_ov(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_urem_must_ov(
+; CHECK-NEXT:    ret i8 0
+;
+  %b = urem i8 %a, 10
+  %r = call i8 @llvm.usub.sat.i8(i8 %b, i8 10)
+  ret i8 %r
+}
+
+; Like the previous case, the result is always zero here. However, as there's
+; no actual overflow, we won't know about it.
+define i8 @test_scalar_usub_urem_must_zero(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_urem_must_zero(
+; CHECK-NEXT:    [[B:%.*]] = urem i8 [[A:%.*]], 10
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 9)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = urem i8 %a, 10
+  %r = call i8 @llvm.usub.sat.i8(i8 %b, i8 9)
+  ret i8 %r
+}
+
+; We have a constant range for the LHS, but only known bits for the RHS
+define i8 @test_scalar_usub_add_nuw_known_bits(i8 %a, i8 %b) {
+; CHECK-LABEL: @test_scalar_usub_add_nuw_known_bits(
+; CHECK-NEXT:    [[AA:%.*]] = add nuw i8 [[A:%.*]], 10
+; CHECK-NEXT:    [[BB:%.*]] = and i8 [[B:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = sub nuw i8 [[AA]], [[BB]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %aa = add nuw i8 %a, 10
+  %bb = and i8 %b, 7
+  %r = call i8 @llvm.usub.sat.i8(i8 %aa, i8 %bb)
+  ret i8 %r
+}
+
+define i8 @test_scalar_usub_add_nuw_inferred(i8 %a) {
+; CHECK-LABEL: @test_scalar_usub_add_nuw_inferred(
+; CHECK-NEXT:    [[B:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 10)
+; CHECK-NEXT:    [[R:%.*]] = add nuw i8 [[B]], 9
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = call i8 @llvm.usub.sat.i8(i8 %a, i8 10)
+  %r = add i8 %b, 9
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_usub_add_nuw_no_ov(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[A:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %b = add nuw <2 x i8> %a, <i8 10, i8 10>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %b, <2 x i8> <i8 9, i8 9>)
+  ret <2 x i8> %r
+}
+
+; Can be optimized if the usub.sat RHS constant range handles non-splat vectors.
+define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat1(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat1(
+; CHECK-NEXT:    [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], <i8 10, i8 10>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> <i8 10, i8 9>)
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %b = add nuw <2 x i8> %a, <i8 10, i8 10>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %b, <2 x i8> <i8 10, i8 9>)
+  ret <2 x i8> %r
+}
+
+; Can be optimized if the add nuw RHS constant range handles non-splat vectors.
+define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat2(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat2(
+; CHECK-NEXT:    [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], <i8 10, i8 9>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> <i8 9, i8 9>)
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %b = add nuw <2 x i8> %a, <i8 10, i8 9>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %b, <2 x i8> <i8 9, i8 9>)
+  ret <2 x i8> %r
+}
+
+; Can be optimized if constant range is tracked per-element.
+define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat3(<2 x i8> %a) {
+; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat3(
+; CHECK-NEXT:    [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], <i8 10, i8 9>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> <i8 10, i8 9>)
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %b = add nuw <2 x i8> %a, <i8 10, i8 9>
+  %r = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %b, <2 x i8> <i8 10, i8 9>)
+  ret <2 x i8> %r
+}
+
+define i8 @test_scalar_ssub_add_nsw_no_ov(i8 %a, i8 %b) {
+; CHECK-LABEL: @test_scalar_ssub_add_nsw_no_ov(
+; CHECK-NEXT:    [[AA:%.*]] = add nsw i8 [[A:%.*]], 7
+; CHECK-NEXT:    [[BB:%.*]] = and i8 [[B:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = sub nsw i8 [[AA]], [[BB]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %aa = add nsw i8 %a, 7
+  %bb = and i8 %b, 7
+  %r = call i8 @llvm.ssub.sat.i8(i8 %aa, i8 %bb)
+  ret i8 %r
+}
+
+define i8 @test_scalar_ssub_add_nsw_may_ov(i8 %a, i8 %b) {
+; CHECK-LABEL: @test_scalar_ssub_add_nsw_may_ov(
+; CHECK-NEXT:    [[AA:%.*]] = add nsw i8 [[A:%.*]], 6
+; CHECK-NEXT:    [[BB:%.*]] = and i8 [[B:%.*]], 7
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[AA]], i8 [[BB]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %aa = add nsw i8 %a, 6
+  %bb = and i8 %b, 7
+  %r = call i8 @llvm.ssub.sat.i8(i8 %aa, i8 %bb)
+  ret i8 %r
+}
+
+define <2 x i8> @test_vector_ssub_add_nsw_no_ov_splat(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_splat(
+; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %aa = add nsw <2 x i8> %a, <i8 7, i8 7>
+  %bb = and <2 x i8> %b, <i8 7, i8 7>
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %aa, <2 x i8> %bb)
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat1(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat1(
+; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], <i8 7, i8 6>
+; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %aa = add nsw <2 x i8> %a, <i8 7, i8 7>
+  %bb = and <2 x i8> %b, <i8 7, i8 6>
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %aa, <2 x i8> %bb)
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat2(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat2(
+; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 8>
+; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[AA]], <2 x i8> [[BB]])
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %aa = add nsw <2 x i8> %a, <i8 7, i8 8>
+  %bb = and <2 x i8> %b, <i8 7, i8 7>
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %aa, <2 x i8> %bb)
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat3(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat3(
+; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 6>
+; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], <i8 7, i8 6>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[AA]], <2 x i8> [[BB]])
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %aa = add nsw <2 x i8> %a, <i8 7, i8 6>
+  %bb = and <2 x i8> %b, <i8 7, i8 6>
+  %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %aa, <2 x i8> %bb)
+  ret <2 x i8> %r
+}
+
+; Raw IR tests
+
+define i32 @uadd_sat(i32 %x, i32 %y) {
+; CHECK-LABEL: @uadd_sat(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %notx = xor i32 %x, -1
+  %a = add i32 %y, %x
+  %c = icmp ult i32 %notx, %y
+  %r = select i1 %c, i32 -1, i32 %a
+  ret i32 %r
+}
+
+define i32 @uadd_sat_commute_add(i32 %xp, i32 %y) {
+; CHECK-LABEL: @uadd_sat_commute_add(
+; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[XP:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %x = urem i32 42, %xp ; thwart complexity-based-canonicalization
+  %notx = xor i32 %x, -1
+  %a = add i32 %x, %y
+  %c = icmp ult i32 %notx, %y
+  %r = select i1 %c, i32 -1, i32 %a
+  ret i32 %r
+}
+
+define i32 @uadd_sat_ugt(i32 %x, i32 %yp) {
+; CHECK-LABEL: @uadd_sat_ugt(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
+  %notx = xor i32 %x, -1
+  %a = add i32 %y, %x
+  %c = icmp ugt i32 %y, %notx
+  %r = select i1 %c, i32 -1, i32 %a
+  ret i32 %r
+}
+
+define <2 x i32> @uadd_sat_ugt_commute_add(<2 x i32> %xp, <2 x i32> %yp) {
+; CHECK-LABEL: @uadd_sat_ugt_commute_add(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], <i32 2442, i32 4242>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> <i32 42, i32 43>, [[XP:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %y = sdiv <2 x i32> %yp, <i32 2442, i32 4242> ; thwart complexity-based-canonicalization
+  %x = srem <2 x i32> <i32 42, i32 43>, %xp     ; thwart complexity-based-canonicalization
+  %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %a = add <2 x i32> %x, %y
+  %c = icmp ugt <2 x i32> %y, %notx
+  %r = select <2 x i1> %c, <2 x i32> <i32 -1, i32 -1>, <2 x i32> %a
+  ret <2 x i32> %r
+}
+
+define i32 @uadd_sat_commute_select(i32 %x, i32 %yp) {
+; CHECK-LABEL: @uadd_sat_commute_select(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
+  %notx = xor i32 %x, -1
+  %a = add i32 %y, %x
+  %c = icmp ult i32 %y, %notx
+  %r = select i1 %c, i32 %a, i32 -1
+  ret i32 %r
+}
+
+define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {
+; CHECK-LABEL: @uadd_sat_commute_select_commute_add(
+; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[XP:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %x = urem i32 42, %xp ; thwart complexity-based-canonicalization
+  %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
+  %notx = xor i32 %x, -1
+  %a = add i32 %x, %y
+  %c = icmp ult i32 %y, %notx
+  %r = select i1 %c, i32 %a, i32 -1
+  ret i32 %r
+}
+
+define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @uadd_sat_commute_select_ugt(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %a = add <2 x i32> %y, %x
+  %c = icmp ugt <2 x i32> %notx, %y
+  %r = select <2 x i1> %c, <2 x i32> %a, <2 x i32> <i32 -1, i32 -1>
+  ret <2 x i32> %r
+}
+
+define i32 @uadd_sat_commute_select_ugt_commute_add(i32 %xp, i32 %y) {
+; CHECK-LABEL: @uadd_sat_commute_select_ugt_commute_add(
+; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[XP:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %x = srem i32 42, %xp   ; thwart complexity-based-canonicalization
+  %notx = xor i32 %x, -1
+  %a = add i32 %x, %y
+  %c = icmp ugt i32 %notx, %y
+  %r = select i1 %c, i32 %a, i32 -1
+  ret i32 %r
+}
+
+; Negative test - make sure we have a -1 in the select.
+
+define i32 @not_uadd_sat(i32 %x, i32 %y) {
+; CHECK-LABEL: @not_uadd_sat(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X]], 1
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add i32 %x, -2
+  %c = icmp ugt i32 %x, 1
+  %r = select i1 %c, i32 %a, i32 %y
+  ret i32 %r
+}
+
+; Negative test - make sure the predicate is 'ult'.
+
+define i32 @not_uadd_sat2(i32 %x, i32 %y) {
+; CHECK-LABEL: @not_uadd_sat2(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X]], 1
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add i32 %x, -2
+  %c = icmp ugt i32 %x, 1
+  %r = select i1 %c, i32 %a, i32 -1
+  ret i32 %r
+}
+
+; The add may include a 'not' op rather than the cmp.
+
+define i32 @uadd_sat_not(i32 %x, i32 %y) {
+; CHECK-LABEL: @uadd_sat_not(
+; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %notx = xor i32 %x, -1
+  %a = add i32 %notx, %y
+  %c = icmp ult i32 %x, %y
+  %r = select i1 %c, i32 -1, i32 %a
+  ret i32 %r
+}
+
+define i32 @uadd_sat_not_commute_add(i32 %xp, i32 %yp) {
+; CHECK-LABEL: @uadd_sat_not_commute_add(
+; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[XP:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = urem i32 42, [[YP:%.*]]
+; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %x = srem i32 42, %xp ; thwart complexity-based-canonicalization
+  %y = urem i32 42, %yp ; thwart complexity-based-canonicalization
+  %notx = xor i32 %x, -1
+  %a = add i32 %y, %notx
+  %c = icmp ult i32 %x, %y
+  %r = select i1 %c, i32 -1, i32 %a
+  ret i32 %r
+}
+
+define i32 @uadd_sat_not_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: @uadd_sat_not_ugt(
+; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %notx = xor i32 %x, -1
+  %a = add i32 %notx, %y
+  %c = icmp ugt i32 %y, %x
+  %r = select i1 %c, i32 -1, i32 %a
+  ret i32 %r
+}
+
+define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) {
+; CHECK-LABEL: @uadd_sat_not_ugt_commute_add(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], <i32 2442, i32 4242>
+; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %y = sdiv <2 x i32> %yp, <i32 2442, i32 4242> ; thwart complexity-based-canonicalization
+  %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %a = add <2 x i32> %y, %notx
+  %c = icmp ugt <2 x i32> %y, %x
+  %r = select <2 x i1> %c, <2 x i32> <i32 -1, i32 -1>, <2 x i32> %a
+  ret <2 x i32> %r
+}
+
+define i32 @uadd_sat_not_commute_select(i32 %x, i32 %y) {
+; CHECK-LABEL: @uadd_sat_not_commute_select(
+; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %notx = xor i32 %x, -1
+  %a = add i32 %notx, %y
+  %c = icmp ult i32 %y, %x
+  %r = select i1 %c, i32 %a, i32 -1
+  ret i32 %r
+}
+
+define i32 @uadd_sat_not_commute_select_commute_add(i32 %x, i32 %yp) {
+; CHECK-LABEL: @uadd_sat_not_commute_select_commute_add(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 42, [[YP:%.*]]
+; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %y = sdiv i32 42, %yp ; thwart complexity-based-canonicalization
+  %notx = xor i32 %x, -1
+  %a = add i32 %y, %notx
+  %c = icmp ult i32 %y, %x
+  %r = select i1 %c, i32 %a, i32 -1
+  ret i32 %r
+}
+
+define <2 x i32> @uadd_sat_not_commute_select_ugt(<2 x i32> %xp, <2 x i32> %yp) {
+; CHECK-LABEL: @uadd_sat_not_commute_select_ugt(
+; CHECK-NEXT:    [[X:%.*]] = urem <2 x i32> <i32 42, i32 -42>, [[XP:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i32> <i32 12, i32 412>, [[YP:%.*]]
+; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %x = urem <2 x i32> <i32 42, i32 -42>, %xp ; thwart complexity-based-canonicalization
+  %y = srem <2 x i32> <i32 12, i32 412>, %yp ; thwart complexity-based-canonicalization
+  %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %a = add <2 x i32> %y, %notx
+  %c = icmp ugt <2 x i32> %x, %y
+  %r = select <2 x i1> %c, <2 x i32> %a, <2 x i32> <i32 -1, i32 -1>
+  ret <2 x i32> %r
+}
+
+define i32 @uadd_sat_not_commute_select_ugt_commute_add(i32 %x, i32 %y) {
+; CHECK-LABEL: @uadd_sat_not_commute_select_ugt_commute_add(
+; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %notx = xor i32 %x, -1
+  %a = add i32 %notx, %y
+  %c = icmp ugt i32 %x, %y
+  %r = select i1 %c, i32 %a, i32 -1
+  ret i32 %r
+}
+
+define i32 @uadd_sat_constant(i32 %x) {
+; CHECK-LABEL: @uadd_sat_constant(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 42
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[X]], -43
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = add i32 %x, 42
+  %c = icmp ugt i32 %x, -43
+  %r = select i1 %c, i32 -1, i32 %a
+  ret i32 %r
+}
+
+define i32 @uadd_sat_constant_commute(i32 %x) {
+; CHECK-LABEL: @uadd_sat_constant_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %a = add i32 %x, 42
+  %c = icmp ult i32 %x, -43
+  %r = select i1 %c, i32 %a, i32 -1
+  ret i32 %r
+}
+
+define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) {
+; CHECK-LABEL: @uadd_sat_constant_vec(
+; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <4 x i32> [[X]], <i32 -43, i32 -43, i32 -43, i32 -43>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[A]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
+  %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
+  %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) {
+; CHECK-LABEL: @uadd_sat_constant_vec_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
+  %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
+  %r = select <4 x i1> %c, <4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @uadd_sat_constant_vec_commute_undefs(<4 x i32> %x) {
+; CHECK-LABEL: @uadd_sat_constant_vec_commute_undefs(
+; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 undef>
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <4 x i32> [[X]], <i32 -43, i32 -43, i32 undef, i32 -43>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> [[A]], <4 x i32> <i32 -1, i32 undef, i32 -1, i32 -1>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 undef>
+  %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 undef, i32 -43>
+  %r = select <4 x i1> %c, <4 x i32> %a, <4 x i32> <i32 -1, i32 undef, i32 -1, i32 -1>
+  ret <4 x i32> %r
+}
+
+declare i32 @get_i32()
+declare <2 x i8> @get_v2i8()
+
+define i32 @unsigned_sat_variable_using_min_add(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_variable_using_min_add(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+  %noty = xor i32 %y, -1
+  %c = icmp ult i32 %x, %noty
+  %s = select i1 %c, i32 %x, i32 %noty
+  %r = add i32 %s, %y
+  ret i32 %r
+}
+
+define i32 @unsigned_sat_variable_using_min_commute_add(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+  %noty = xor i32 %y, -1
+  %c = icmp ult i32 %x, %noty
+  %s = select i1 %c, i32 %x, i32 %noty
+  %r = add i32 %y, %s
+  ret i32 %r
+}
+
+define <2 x i8> @unsigned_sat_variable_using_min_commute_select(<2 x i8> %x) {
+; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_select(
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i8> @get_v2i8()
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization
+  %noty = xor <2 x i8> %y, <i8 -1, i8 -1>
+  %c = icmp ult <2 x i8> %noty, %x
+  %s = select <2 x i1> %c, <2 x i8> %noty, <2 x i8> %x
+  %r = add <2 x i8> %s, %y
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @unsigned_sat_variable_using_min_commute_add_select(<2 x i8> %x) {
+; CHECK-LABEL: @unsigned_sat_variable_using_min_commute_add_select(
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i8> @get_v2i8()
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %y = call <2 x i8> @get_v2i8() ; thwart complexity-based canonicalization
+  %noty = xor <2 x i8> %y, <i8 -1, i8 -1>
+  %c = icmp ult <2 x i8> %noty, %x
+  %s = select <2 x i1> %c, <2 x i8> %noty, <2 x i8> %x
+  %r = add <2 x i8> %y, %s
+  ret <2 x i8> %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_variable_using_wrong_min(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_variable_using_wrong_min(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[NOTY]], [[X:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y]], [[S]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+  %noty = xor i32 %y, -1
+  %c = icmp slt i32 %x, %noty
+  %s = select i1 %c, i32 %x, i32 %noty
+  %r = add i32 %y, %s
+  ret i32 %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_variable_using_wrong_value(i32 %x, i32 %z) {
+; CHECK-LABEL: @unsigned_sat_variable_using_wrong_value(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @get_i32()
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 [[NOTY]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[S]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %y = call i32 @get_i32() ; thwart complexity-based canonicalization
+  %noty = xor i32 %y, -1
+  %c = icmp ult i32 %x, %noty
+  %s = select i1 %c, i32 %x, i32 %noty
+  %r = add i32 %z, %s
+  ret i32 %r
+}
+
+; If we have a constant operand, there's no commutativity variation.
+
+define i32 @unsigned_sat_constant_using_min(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 -43)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %c = icmp ult i32 %x, 42
+  %s = select i1 %c, i32 %x, i32 42
+  %r = add i32 %s, -43
+  ret i32 %r
+}
+
+define <2 x i32> @unsigned_sat_constant_using_min_splat(<2 x i32> %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min_splat(
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -15, i32 -15>)
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %c = icmp ult <2 x i32> %x, <i32 14, i32 14>
+  %s = select <2 x i1> %c, <2 x i32> %x, <2 x i32> <i32 14, i32 14>
+  %r = add <2 x i32> %s, <i32 -15, i32 -15>
+  ret <2 x i32> %r
+}
+
+; Negative test
+
+define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) {
+; CHECK-LABEL: @unsigned_sat_constant_using_min_wrong_constant(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[X:%.*]], 42
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 [[X]], i32 42
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[S]], -42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %c = icmp ult i32 %x, 42
+  %s = select i1 %c, i32 %x, i32 42
+  %r = add i32 %s, -42
+  ret i32 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll
new file mode 100644
index 00000000000..586509542b8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/scalarization.ll
@@ -0,0 +1,335 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @extract_load(<4 x i32>* %p) {
+; CHECK-LABEL: @extract_load(
+; CHECK-NEXT:    [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x i32> [[X]], i32 1
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %x = load <4 x i32>, <4 x i32>* %p, align 4
+  %ext = extractelement <4 x i32> %x, i32 1
+  ret i32 %ext
+}
+
+define double @extract_load_fp(<4 x double>* %p) {
+; CHECK-LABEL: @extract_load_fp(
+; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 3
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %x = load <4 x double>, <4 x double>* %p, align 32
+  %ext = extractelement <4 x double> %x, i32 3
+  ret double %ext
+}
+
+define double @extract_load_volatile(<4 x double>* %p) {
+; CHECK-LABEL: @extract_load_volatile(
+; CHECK-NEXT:    [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 2
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %x = load volatile <4 x double>, <4 x double>* %p
+  %ext = extractelement <4 x double> %x, i32 2
+  ret double %ext
+}
+
+define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) {
+; CHECK-LABEL: @extract_load_extra_use(
+; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 0
+; CHECK-NEXT:    store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %x = load <4 x double>, <4 x double>* %p, align 8
+  %ext = extractelement <4 x double> %x, i32 0
+  store <4 x double> %x, <4 x double>* %p2
+  ret double %ext
+}
+
+define double @extract_load_variable_index(<4 x double>* %p, i32 %y) {
+; CHECK-LABEL: @extract_load_variable_index(
+; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %x = load <4 x double>, <4 x double>* %p
+  %ext = extractelement <4 x double> %x, i32 %y
+  ret double %ext
+}
+
+define void @scalarize_phi(i32 * %n, float * %inout) {
+; CHECK-LABEL: @scalarize_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_0]], [[T1]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.body:
+; CHECK-NEXT:    store volatile float [[TMP0]], float* [[INOUT]], align 4
+; CHECK-NEXT:    [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %t0 = load volatile float, float * %inout, align 4
+  %insert = insertelement <4 x float> undef, float %t0, i32 0
+  %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
+  %insert1 = insertelement <4 x float> undef, float 3.0, i32 0
+  br label %for.cond
+
+for.cond:
+  %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %t1 = load i32, i32 * %n, align 4
+  %cmp = icmp ne i32 %i.0, %t1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %t2 = extractelement <4 x float> %x.0, i32 1
+  store volatile float %t2, float * %inout, align 4
+  %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
+; CHECK-LABEL: @extract_element_binop_splat_constant_index(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000
+; CHECK-NEXT:    ret float [[R]]
+;
+  %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
+  %r = extractelement <4 x float> %b, i32 2
+  ret float %r
+}
+
+define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) {
+; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]]
+; CHECK-NEXT:    ret double [[R]]
+;
+  %b = fdiv <2 x double> <double 42.0, double undef>, %x
+  %r = extractelement <2 x double> %b, i32 0
+  ret double %r
+}
+
+define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) {
+; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
+; CHECK-NEXT:    [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01
+; CHECK-NEXT:    ret float [[R]]
+;
+  %b = fmul <2 x float> %x, <float 42.0, float 43.0>
+  %r = extractelement <2 x float> %b, i32 1
+  ret float %r
+}
+
+define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) {
+; CHECK-LABEL: @extract_element_binop_splat_variable_index(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[TMP1]], 42
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42>
+  %r = extractelement <4 x i8> %b, i32 %y
+  ret i8 %r
+}
+
+define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) {
+; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index(
+; CHECK-NEXT:    [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42>
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42>
+  %r = extractelement <4 x i8> %b, i32 %y
+  ret i8 %r
+}
+
+define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
+; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index(
+; CHECK-NEXT:    [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2>
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2>
+  %r = extractelement <4 x i8> %b, i32 %y
+  ret i8 %r
+}
+
+define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) {
+; CHECK-LABEL: @extract_element_load(
+; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %load = load <4 x float>, <4 x float>* %ptr
+  %add = fadd <4 x float> %x, %load
+  %r = extractelement <4 x float> %add, i32 2
+  ret float %r
+}
+
+define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) {
+; CHECK-LABEL: @extract_element_multi_Use_load(
+; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16
+; CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[ADD]], i32 2
+; CHECK-NEXT:    ret float [[R]]
+;
+  %load = load <4 x float>, <4 x float>* %ptr0
+  store <4 x float> %load, <4 x float>* %ptr1
+  %add = fadd <4 x float> %x, %load
+  %r = extractelement <4 x float> %add, i32 2
+  ret float %r
+}
+
+define float @extract_element_variable_index(<4 x float> %x, i32 %y) {
+; CHECK-LABEL: @extract_element_variable_index(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00
+; CHECK-NEXT:    ret float [[R]]
+;
+  %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %r = extractelement <4 x float> %add, i32 %y
+  ret float %r
+}
+
+define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) {
+; CHECK-LABEL: @extelt_binop_insertelt(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]]
+; CHECK-NEXT:    ret float [[E]]
+;
+  %C = insertelement <4 x float> %A, float %f, i32 0
+  %D = fmul nnan <4 x float> %C, %B
+  %E = extractelement <4 x float> %D, i32 0
+  ret float %E
+}
+
+; We recurse to find a scalarizable operand.
+; FIXME: We should propagate the IR flags including wrapping flags.
+
+define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) {
+; CHECK-LABEL: @extelt_binop_binop_insertelt(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i32 0
+; CHECK-NEXT:    [[E:%.*]] = mul i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %v = insertelement <4 x i32> %A, i32 %f, i32 0
+  %C = add <4 x i32> %v, %B
+  %D = mul nsw <4 x i32> %C, %B
+  %E = extractelement <4 x i32> %D, i32 0
+  ret i32 %E
+}
+
+define float @extract_element_constant_vector_variable_index(i32 %y) {
+; CHECK-LABEL: @extract_element_constant_vector_variable_index(
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y
+  ret float %r
+}
+
+define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) {
+; CHECK-LABEL: @cheap_to_extract_icmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %cmp = icmp eq <4 x i32> %x, zeroinitializer
+  %and = and <4 x i1> %cmp, %y
+  %r = extractelement <4 x i1> %and, i32 2
+  ret i1 %r
+}
+
+define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) {
+; CHECK-LABEL: @cheap_to_extract_fcmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %cmp = fcmp oeq <4 x float> %x, zeroinitializer
+  %and = and <4 x i1> %cmp, %y
+  %r = extractelement <4 x i1> %and, i32 2
+  ret i1 %r
+}
+
+define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) {
+; CHECK-LABEL: @extractelt_vector_icmp_constrhs(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 0
+; CHECK-NEXT:    [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[EXT]]
+;
+  %cmp = icmp eq <2 x i32> %arg, zeroinitializer
+  %ext = extractelement <2 x i1> %cmp, i32 0
+  ret i1 %ext
+}
+
+define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) {
+; CHECK-LABEL: @extractelt_vector_fcmp_constrhs(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 0
+; CHECK-NEXT:    [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[EXT]]
+;
+  %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
+  %ext = extractelement <2 x i1> %cmp, i32 0
+  ret i1 %ext
+}
+
+define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) {
+; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[EXT]]
+;
+  %cmp = icmp eq <2 x i32> %arg, zeroinitializer
+  %ext = extractelement <2 x i1> %cmp, i32 %idx
+  ret i1 %ext
+}
+
+define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) {
+; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[EXT]]
+;
+  %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
+  %ext = extractelement <2 x i1> %cmp, i32 %idx
+  ret i1 %ext
+}
+
+define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) {
+; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(
+; CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT:    store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]]
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i32 0
+; CHECK-NEXT:    ret i1 [[EXT]]
+;
+  %add = fadd <2 x float> %arg1, %arg2
+  store volatile <2 x float> %add, <2 x float>* undef
+  %cmp = fcmp oeq <2 x float> %arg0, %add
+  %ext = extractelement <2 x i1> %cmp, i32 0
+  ret i1 %ext
+}
diff --git a/llvm/test/Transforms/InstCombine/sdiv-1.ll b/llvm/test/Transforms/InstCombine/sdiv-1.ll
new file mode 100644
index 00000000000..079d6e62954
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sdiv-1.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -inline -S | FileCheck %s
+; PR3142
+
+define i32 @a(i32 %X) {
+; CHECK-LABEL: @a(
+; CHECK-NEXT:    [[T0:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = sdiv i32 [[T0]], -3
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 0, %X
+  %t1 = sdiv i32 %t0, -3
+  ret i32 %t1
+}
+
+define i32 @b(i32 %X) {
+; CHECK-LABEL: @b(
+; CHECK-NEXT:    ret i32 715827882
+;
+  %t0 = call i32 @a(i32 -2147483648)
+  ret i32 %t0
+}
+
+define i32 @c(i32 %X) {
+; CHECK-LABEL: @c(
+; CHECK-NEXT:    ret i32 715827882
+;
+  %t0 = sub i32 0, -2147483648
+  %t1 = sdiv i32 %t0, -3
+  ret i32 %t1
+}
diff --git a/llvm/test/Transforms/InstCombine/sdiv-2.ll b/llvm/test/Transforms/InstCombine/sdiv-2.ll
new file mode 100644
index 00000000000..0e4c0080201
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sdiv-2.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR3144
+
+define fastcc i32 @func(i32 %length) nounwind {
+entry:
+	%0 = icmp ne i32 %length, -1		; <i1> [#uses=1]
+	%iftmp.13.0 = select i1 %0, i128 0, i128 200000000		; <i128> [#uses=2]
+	%1 = sdiv i128 %iftmp.13.0, 10		; <i128> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %bb8, %entry
+	%v.0 = phi i128 [ 0, %entry ], [ %6, %bb8 ]		; <i128> [#uses=2]
+	%2 = icmp sgt i128 %v.0, %1		; <i1> [#uses=1]
+	br i1 %2, label %overflow, label %bb7
+
+bb7:		; preds = %bb5
+	%3 = mul i128 %v.0, 10		; <i128> [#uses=2]
+	%4 = sub i128 %iftmp.13.0, 0		; <i128> [#uses=1]
+	%5 = icmp slt i128 %4, %3		; <i1> [#uses=1]
+	br i1 %5, label %overflow, label %bb8
+
+bb8:		; preds = %bb7
+	%6 = add i128 0, %3		; <i128> [#uses=1]
+	br label %bb5
+
+overflow:		; preds = %bb7, %bb5
+	ret i32 1
+}
diff --git a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
new file mode 100644
index 00000000000..39ba5120ed6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test_sdiv_canonicalize_op0(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_canonicalize_op0(
+; CHECK-NEXT:    [[SDIV1:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SDIV:%.*]] = sub nsw i32 0, [[SDIV1]]
+; CHECK-NEXT:    ret i32 [[SDIV]]
+;
+  %neg = sub nsw i32 0, %x
+  %sdiv = sdiv i32 %neg, %y
+  ret i32 %sdiv
+}
+
+define i32 @test_sdiv_canonicalize_op0_exact(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_canonicalize_op0_exact(
+; CHECK-NEXT:    [[SDIV1:%.*]] = sdiv exact i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SDIV:%.*]] = sub nsw i32 0, [[SDIV1]]
+; CHECK-NEXT:    ret i32 [[SDIV]]
+;
+  %neg = sub nsw i32 0, %x
+  %sdiv = sdiv exact i32 %neg, %y
+  ret i32 %sdiv
+}
+
+; (X/-Y) is not equal to -(X/Y), don't canonicalize.
+define i32 @test_sdiv_canonicalize_op1(i32 %x, i32 %z) {
+; CHECK-LABEL: @test_sdiv_canonicalize_op1(
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[Z:%.*]], 3
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[SDIV:%.*]] = sdiv i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    ret i32 [[SDIV]]
+;
+  %y = mul i32 %z, 3
+  %neg = sub nsw i32 0, %x
+  %sdiv = sdiv i32 %y, %neg
+  ret i32 %sdiv
+}
+
+define i32 @test_sdiv_canonicalize_nonsw(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_canonicalize_nonsw(
+; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[SDIV:%.*]] = sdiv i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[SDIV]]
+;
+  %neg = sub i32 0, %x
+  %sdiv = sdiv i32 %neg, %y
+  ret i32 %sdiv
+}
+
+define <2 x i32> @test_sdiv_canonicalize_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test_sdiv_canonicalize_vec(
+; CHECK-NEXT:    [[SDIV1:%.*]] = sdiv <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SDIV:%.*]] = sub nsw <2 x i32> zeroinitializer, [[SDIV1]]
+; CHECK-NEXT:    ret <2 x i32> [[SDIV]]
+;
+  %neg = sub nsw <2 x i32> <i32 0, i32 0>, %x
+  %sdiv = sdiv <2 x i32> %neg, %y
+  ret <2 x i32> %sdiv
+}
+
+define i32 @test_sdiv_canonicalize_multiple_uses(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_sdiv_canonicalize_multiple_uses(
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[SDIV:%.*]] = sdiv i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[SDIV2:%.*]] = sdiv i32 [[SDIV]], [[NEG]]
+; CHECK-NEXT:    ret i32 [[SDIV2]]
+;
+  %neg = sub nsw i32 0, %x
+  %sdiv = sdiv i32 %neg, %y
+  %sdiv2 = sdiv i32 %sdiv, %neg
+  ret i32 %sdiv2
+}
+
+; There is combination: -(X/CE) -> (X/-CE).
+; If combines (X/-CE) to -(X/CE), make sure don't combine them endless.
+
+@X = global i32 5
+
+define i64 @test_sdiv_canonicalize_constexpr(i64 %L1) {
+; currently opt folds (sub nsw i64 0, constexpr) -> (sub i64, 0, constexpr).
+; sdiv canonicalize requires a nsw sub.
+; CHECK-LABEL: @test_sdiv_canonicalize_constexpr(
+; CHECK-NEXT:    [[B4:%.*]] = sdiv i64 [[L1:%.*]], sub (i64 0, i64 ptrtoint (i32* @X to i64))
+; CHECK-NEXT:    ret i64 [[B4]]
+;
+  %v1 = ptrtoint i32* @X to i64
+  %B8 = sub nsw i64 0, %v1
+  %B4 = sdiv i64 %L1, %B8
+  ret i64 %B4
+}
diff --git a/llvm/test/Transforms/InstCombine/sdiv-guard.ll b/llvm/test/Transforms/InstCombine/sdiv-guard.ll
new file mode 100644
index 00000000000..e861fcb6efa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sdiv-guard.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+; Regression test. If %flag is false then %s == 0 and guard should be triggered.
+define i32 @a(i1 %flag, i32 %X) nounwind readnone {
+; CHECK-LABEL: @a(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = and i1 [[CMP1]], [[FLAG:%.*]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[CMP]]) #1 [ "deopt"() ]
+; CHECK-NEXT:    [[R:%.*]] = sdiv i32 100, [[X]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %s = select i1 %flag, i32 %X, i32 0
+  %cmp = icmp ne i32 %s, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %cmp )[ "deopt"() ]
+  %r = sdiv i32 100, %s
+  ret i32 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/select-2.ll b/llvm/test/Transforms/InstCombine/select-2.ll
new file mode 100644
index 00000000000..832d958c5f3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-2.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: select
+; CHECK: select
+
+; Make sure instcombine don't fold select into operands. We don't want to emit
+; select of two integers unless it's selecting 0 / 1.
+
+define i32 @t1(i32 %c, i32 %x) nounwind {
+       %t1 = icmp eq i32 %c, 0
+       %t2 = lshr i32 %x, 18
+       %t3 = select i1 %t1, i32 %t2, i32 %x
+       ret i32 %t3
+}
+
+define i32 @t2(i32 %c, i32 %x) nounwind {
+       %t1 = icmp eq i32 %c, 0
+       %t2 = and i32 %x, 18
+       %t3 = select i1 %t1, i32 %t2, i32 %x
+       ret i32 %t3
+}
+
+define float @t3(float %x, float %y) nounwind {
+  %t1 = fcmp ogt float %x, %y
+  %t2 = select i1 %t1, float %x, float 1.0
+  %t3 = fadd fast float %t2, 1.0
+  ret float %t3
+; CHECK-LABEL: @t3(
+; CHECK: fadd fast
+; CHECK: select
+}
diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
new file mode 100644
index 00000000000..a473acd7304
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
@@ -0,0 +1,1088 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @use(<2 x i1>)
+declare void @use2(i1)
+
+define i32 @select_xor_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_meta(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_meta(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]], !prof !0
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y, !prof !0
+  ret i32 %C
+}
+
+define i32 @select_mul_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_mul_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = mul i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_add_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_add_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = add i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = or i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, -1
+  %B = and i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define <2 x i8> @select_xor_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp eq <2 x i8>  %x, <i8 0, i8 0>
+  %B = xor <2 x i8>  %x, %z
+  %C = select <2 x i1>  %A, <2 x i8>  %B, <2 x i8>  %y
+  ret <2 x i8>  %C
+}
+
+define <2 x i8> @select_xor_icmp_vec_use(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_use(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    call void @use(<2 x i1> [[A]])
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp ne <2 x i8>  %x, <i8 0, i8 0>
+  call void @use(<2 x i1> %A)
+  %B = xor <2 x i8>  %x, %z
+  %C = select <2 x i1>  %A, <2 x i8>  %y, <2 x i8>  %B
+  ret <2 x i8>  %C
+}
+
+define i32 @select_xor_inv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_inv_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = xor i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_inv_icmp2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_inv_icmp2(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @use2(i1 [[A]])
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  call void @use2(i1 %A) ; thwart predicate canonicalization
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+define float @select_fadd_fcmp(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Z:%.*]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -0.0
+  %B = fadd nsz float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; This is logically equivalent to the previous test - fcmp ignores the sign of 0.0.
+
+define float @select_fadd_fcmp_poszero(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_poszero(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Z:%.*]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 0.0
+  %B = fadd nsz float %z, %x
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fadd_fcmp_2(float %x, float %y, float %v) {
+; CHECK-LABEL: @select_fadd_fcmp_2(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[Z:%.*]] = fadd float [[V:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[Z]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, -0.0
+  %z = fadd float %v, 0.0 ; cannot produce -0.0
+  %B = fadd float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; This is logically equivalent to the previous test - fcmp ignores the sign of 0.0.
+
+define float @select_fadd_fcmp_2_poszero(float %x, float %y, float %v) {
+; CHECK-LABEL: @select_fadd_fcmp_2_poszero(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[Z:%.*]] = fadd float [[V:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[Z]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, 0.0
+  %z = fadd float %v, 0.0 ; cannot produce -0.0
+  %B = fadd float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+define float @select_fadd_fcmp_3(float %x, float %y) {
+; CHECK-LABEL: @select_fadd_fcmp_3(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float 6.000000e+00
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, -0.0
+  %B = fadd float 6.0, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; This is logically equivalent to the previous test - fcmp ignores the sign of 0.0.
+
+define float @select_fadd_fcmp_3_poszero(float %x, float %y) {
+; CHECK-LABEL: @select_fadd_fcmp_3_poszero(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float 6.000000e+00
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, 0.0
+  %B = fadd float 6.0, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+define float @select_fadd_fcmp_4(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_4(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[Z:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, -0.0
+  %B = fadd nsz float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; This is logically equivalent to the previous test - fcmp ignores the sign of 0.0.
+
+define float @select_fadd_fcmp_4_poszero(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_4_poszero(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[Z:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, 0.0
+  %B = fadd nsz float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+define float @select_fadd_fcmp_5(float %x, float %y, float %v) {
+; CHECK-LABEL: @select_fadd_fcmp_5(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[Z:%.*]] = fadd float [[V:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Z]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -0.0
+  %z = fadd float %v, 0.0 ; cannot produce -0.0
+  %B = fadd float %z, %x
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; This is logically equivalent to the previous test - fcmp ignores the sign of 0.0.
+
+define float @select_fadd_fcmp_5_poszero(float %x, float %y, float %v) {
+; CHECK-LABEL: @select_fadd_fcmp_5_poszero(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[Z:%.*]] = fadd float [[V:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Z]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 0.0
+  %z = fadd float %v, 0.0 ; cannot produce -0.0
+  %B = fadd float %z, %x
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fadd_fcmp_6(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_6(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float 6.000000e+00, float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -0.0
+  %B = fadd float %x, 6.0
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; This is logically equivalent to the previous test - fcmp ignores the sign of 0.0.
+
+define float @select_fadd_fcmp_6_poszero(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_6_poszero(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float 6.000000e+00, float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 0.0
+  %B = fadd float %x, 6.0
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fmul_fcmp(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fmul_fcmp(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Z:%.*]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 1.0
+  %B = fmul nsz float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fsub_fcmp(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fsub_fcmp(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Z:%.*]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 0.0
+  %B = fsub nsz float %z, %x
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; This is logically equivalent to the previous test - fcmp ignores the sign of 0.0.
+
+define float @select_fsub_fcmp_negzero(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fsub_fcmp_negzero(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Z:%.*]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -0.0
+  %B = fsub nsz float %z, %x
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fdiv_fcmp(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fdiv_fcmp(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Z:%.*]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 1.0
+  %B = fdiv nsz float %z, %x
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define i32 @select_sub_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = sub i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sub_icmp_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @use2(i1 [[A]])
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  call void @use2(i1 %A)
+  %B = sub i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sub_icmp_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @use2(i1 [[A]])
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  call void @use2(i1 %A)
+  %B = sub i32 %z, %x
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+define <2 x i8> @select_sub_icmp_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_sub_icmp_vec(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp eq <2 x i8>  %x, <i8 0, i8 0>
+  %B = sub <2 x i8>  %z, %x
+  %C = select <2 x i1>  %A, <2 x i8>  %B, <2 x i8>  %y
+  ret <2 x i8>  %C
+}
+
+define i32 @select_shl_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_shl_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @use2(i1 [[A]])
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  call void @use2(i1 %A) ; thwart predicate canonicalization
+  %B = shl i32 %z, %x
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+define i32 @select_lshr_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_lshr_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = lshr i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_ashr_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_ashr_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @use2(i1 [[A]])
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  call void @use2(i1 %A) ; thwart predicate canonicalization
+  %B = ashr i32 %z, %x
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+define i32 @select_udiv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_udiv_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Z:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = udiv i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sdiv_icmp(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sdiv_icmp(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT:    call void @use2(i1 [[A]])
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 1
+  call void @use2(i1 %A) ; thwart predicate canonicalization
+  %B = sdiv i32 %z, %x
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+; Negative tests
+define i32 @select_xor_icmp_bad_1(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_xor_icmp_bad_1(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, %k
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_2(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_xor_icmp_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[K:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = xor i32 %k, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 3
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_fcmp_bad_4(i32 %x, i32 %y, i32 %z, float %k) {
+; CHECK-LABEL: @select_xor_fcmp_bad_4(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[K:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = fcmp oeq float %k, 0.0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_5(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_5(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[Y:%.*]], i32 [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_xor_icmp_bad_6(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 1
+  %B = xor i32 %x, %z
+  %C = select i1 %A, i32 %y, i32 %B
+  ret i32 %C
+}
+
+define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 5, i8 3>
+; CHECK-NEXT:    [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp eq <2 x i8>  %x, <i8 5, i8 3>
+  %B = xor <2 x i8>  %x, %z
+  %C = select <2 x i1>  %A, <2 x i8>  %B, <2 x i8>  %y
+  ret <2 x i8>  %C
+}
+
+; TODO: support for undefs, check for an identity constant does not handle them yet
+define <2 x i8> @select_xor_icmp_vec_bad_2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @select_xor_icmp_vec_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 0, i8 undef>
+; CHECK-NEXT:    [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[C]]
+;
+  %A = icmp eq <2 x i8>  %x, <i8 0, i8 undef>
+  %B = xor <2 x i8>  %x, %z
+  %C = select <2 x i1>  %A, <2 x i8>  %B, <2 x i8>  %y
+  ret <2 x i8>  %C
+}
+
+define i32 @select_mul_icmp_bad(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_mul_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 3
+  %B = mul i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_add_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_add_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = add i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_and_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_and_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = and i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_or_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_or_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = or i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 3
+  %B = or i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+; Invalid identity constant for FP op
+define float @select_fadd_fcmp_bad(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], -1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd nsz float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -1.0
+  %B = fadd nsz float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; Invalid comparison type
+define float @select_fadd_fcmp_bad_2(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = fcmp ueq float [[X:%.*]], -1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp ueq float %x, -1.0
+  %B = fadd float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; Invalid comparison type
+define float @select_fadd_fcmp_bad_3(float %x, float %y, float %z, float %k) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_3(
+; CHECK-NEXT:    [[A:%.*]] = fcmp one float [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp one float %x, %k
+  %B = fadd float %x, %z
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Invalid order of operands of select
+define float @select_fadd_fcmp_bad_4(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_4(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, -0.0
+  %B = fadd float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; Invalid comparison type
+define float @select_fadd_fcmp_bad_5(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_5(
+; CHECK-NEXT:    [[A:%.*]] = fcmp one float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd nsz float [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp one float %x, -0.0
+  %B = fadd nsz float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Invalid order of operands of select
+define float @select_fadd_fcmp_bad_6(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_6(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd nsz float [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -0.0
+  %B = fadd nsz float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Do not transform if we have signed zeros and if Z is possibly negative zero
+define float @select_fadd_fcmp_bad_7(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_7(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -0.0
+  %B = fadd float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; Invalid comparison type
+define float @select_fadd_fcmp_bad_8(float %x, float %y, float %v) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_8(
+; CHECK-NEXT:    [[A:%.*]] = fcmp one float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[Z:%.*]] = fadd float [[V:%.*]], -1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[Z]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp one float %x, -0.0
+  %z = fadd float %v, -1.0
+  %B = fadd float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Invalid comparison type
+define float @select_fadd_fcmp_bad_9(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_9(
+; CHECK-NEXT:    [[A:%.*]] = fcmp one float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd nsz float [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp one float %x, -0.0
+  %B = fadd nsz float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Invalid comparison type
+define float @select_fadd_fcmp_bad_10(float %x, float %y, float %v) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_10(
+; CHECK-NEXT:    [[A:%.*]] = fcmp one float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[Z:%.*]] = fadd float [[V:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[Z]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp one float %x, -0.0
+  %z = fadd float %v, 0.0 ; cannot produce -0.0
+  %B = fadd float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Do not transform if Z is possibly negative zero
+define float @select_fadd_fcmp_bad_11(float %x, float %y, float %v) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_11(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[Z:%.*]] = fadd float [[V:%.*]], -1.000000e+00
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[Z]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, -0.0
+  %z = fadd float %v, -1.0
+  %B = fadd nsz float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Do not transform if we have signed zeros and if Z is possibly negative zero
+define float @select_fadd_fcmp_bad_12(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_12(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd float [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, -0.0
+  %B = fadd float %z, %x
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Invalid order of operands of select
+define float @select_fadd_fcmp_bad_13(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_13(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd nsz float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, -0.0
+  %B = fadd nsz float %x, %z
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+; Invalid identity constant for FP op
+define float @select_fadd_fcmp_bad_14(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fadd_fcmp_bad_14(
+; CHECK-NEXT:    [[A:%.*]] = fcmp une float [[X:%.*]], -1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fadd nsz float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[Y:%.*]], float [[B]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp une float %x, -1.0
+  %B = fadd nsz float %x, %z
+  %C = select i1 %A, float %y, float %B
+  ret float %C
+}
+
+define float @select_fmul_fcmp_bad(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fmul_fcmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fmul nsz float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 3.0
+  %B = fmul nsz float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fmul_fcmp_bad_2(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fmul_fcmp_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fmul float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 1.0
+  %B = fmul float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fmul_icmp_bad(float %x, float %y, float %z, i32 %k) {
+; CHECK-LABEL: @select_fmul_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[K:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = fmul float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = icmp eq i32 %k, 0
+  %B = fmul float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fmul_icmp_bad_2(float %x, float %y, float %z, i32 %k) {
+; CHECK-LABEL: @select_fmul_icmp_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[K:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = fmul nsz float [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = icmp eq i32 %k, 0
+  %B = fmul nsz float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fdiv_fcmp_bad(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fdiv_fcmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fdiv float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 1.0
+  %B = fdiv float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fdiv_fcmp_bad_2(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fdiv_fcmp_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fdiv nsz float [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 3.0
+  %B = fdiv nsz float %x, %z
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+; The transform is not valid when x = -0.0 and z = -0.0
+; (optimized code would return -0.0, but this returns +0.0).
+
+define float @select_fsub_fcmp_bad(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fsub_fcmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fsub float [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 0.0
+  %B = fsub float %z, %x
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define float @select_fsub_fcmp_bad_2(float %x, float %y, float %z) {
+; CHECK-LABEL: @select_fsub_fcmp_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = fcmp oeq float [[X:%.*]], 1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fsub nsz float [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], float [[B]], float [[Y:%.*]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fcmp oeq float %x, 1.0
+  %B = fsub nsz float %z, %x
+  %C = select i1 %A, float %B, float %y
+  ret float %C
+}
+
+define i32 @select_sub_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = sub i32 [[X]], [[Z:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = sub i32 %x, %z
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sub_icmp_bad_2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp_bad_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = sub i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = sub i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sub_icmp_bad_3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp_bad_3(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @use2(i1 [[A]])
+; CHECK-NEXT:    [[B:%.*]] = sub i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  call void @use2(i1 %A)
+  %B = sub i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sub_icmp_4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sub_icmp_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @use2(i1 [[A]])
+; CHECK-NEXT:    [[B:%.*]] = sub i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp ne i32 %x, 0
+  call void @use2(i1 %A)
+  %B = sub i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sub_icmp_bad_4(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_sub_icmp_bad_4(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = sub i32 [[Z:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = sub i32 %z, %k
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sub_icmp_bad_5(i32 %x, i32 %y, i32 %z, i32 %k) {
+; CHECK-LABEL: @select_sub_icmp_bad_5(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], [[K:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = sub i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, %k
+  %B = sub i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_shl_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_shl_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = shl i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_lshr_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_lshr_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = lshr i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = lshr i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_ashr_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_ashr_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 1
+  %B = ashr i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_udiv_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_udiv_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 3
+  %B = udiv i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+define i32 @select_sdiv_icmp_bad(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @select_sdiv_icmp_bad(
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = sdiv i32 [[Z:%.*]], [[X]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A]], i32 [[B]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = icmp eq i32 %x, 3
+  %B = sdiv i32 %z, %x
+  %C = select i1 %A, i32 %B, i32 %y
+  ret i32 %C
+}
+
+!0 = !{!"branch_weights", i32 2, i32 10}
diff --git a/llvm/test/Transforms/InstCombine/select-bitext-bitwise-ops.ll b/llvm/test/Transforms/InstCombine/select-bitext-bitwise-ops.ll
new file mode 100644
index 00000000000..25aadd65142
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-bitext-bitwise-ops.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i64 @sel_false_val_is_a_masked_shl_of_true_val1(i32 %x, i64 %y) {
+; CHECK-LABEL: @sel_false_val_is_a_masked_shl_of_true_val1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 15
+  %2 = shl nuw nsw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @sel_false_val_is_a_masked_shl_of_true_val2(i32 %x, i64 %y) {
+; CHECK-LABEL: @sel_false_val_is_a_masked_shl_of_true_val2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 60
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %1 = and i32 %x, 15
+  %2 = shl nuw nsw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %2, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @sel_false_val_is_a_masked_lshr_of_true_val1(i32 %x, i64 %y) {
+; CHECK-LABEL: @sel_false_val_is_a_masked_lshr_of_true_val1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 60
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 60
+  %2 = lshr i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @sel_false_val_is_a_masked_lshr_of_true_val2(i32 %x, i64 %y) {
+; CHECK-LABEL: @sel_false_val_is_a_masked_lshr_of_true_val2(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %1 = and i32 %x, 60
+  %2 = lshr i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %2, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @sel_false_val_is_a_masked_ashr_of_true_val1(i32 %x, i64 %y) {
+; CHECK-LABEL: @sel_false_val_is_a_masked_ashr_of_true_val1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2147483588
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, -2147483588
+  %2 = ashr i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @sel_false_val_is_a_masked_ashr_of_true_val2(i32 %x, i64 %y) {
+; CHECK-LABEL: @sel_false_val_is_a_masked_ashr_of_true_val2(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], -536870897
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %1 = and i32 %x, -2147483588
+  %2 = ashr i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %2, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
diff --git a/llvm/test/Transforms/InstCombine/select-bitext.ll b/llvm/test/Transforms/InstCombine/select-bitext.ll
new file mode 100644
index 00000000000..d44be273573
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-bitext.ll
@@ -0,0 +1,619 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Widen a select of constants to eliminate an extend.
+
+define i16 @sel_sext_constants(i1 %cmp) {
+; CHECK-LABEL: @sel_sext_constants(
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i16 -1, i16 42
+; CHECK-NEXT:    ret i16 [[EXT]]
+;
+  %sel = select i1 %cmp, i8 255, i8 42
+  %ext = sext i8 %sel to i16
+  ret i16 %ext
+}
+
+define i16 @sel_zext_constants(i1 %cmp) {
+; CHECK-LABEL: @sel_zext_constants(
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i16 255, i16 42
+; CHECK-NEXT:    ret i16 [[EXT]]
+;
+  %sel = select i1 %cmp, i8 255, i8 42
+  %ext = zext i8 %sel to i16
+  ret i16 %ext
+}
+
+define double @sel_fpext_constants(i1 %cmp) {
+; CHECK-LABEL: @sel_fpext_constants(
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], double -2.550000e+02, double 4.200000e+01
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %sel = select i1 %cmp, float -255.0, float 42.0
+  %ext = fpext float %sel to double
+  ret double %ext
+}
+
+; FIXME: We should not grow the size of the select in the next 4 cases.
+
+define i64 @sel_sext(i32 %a, i1 %cmp) {
+; CHECK-LABEL: @sel_sext(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i64 [[TMP1]], i64 42
+; CHECK-NEXT:    ret i64 [[EXT]]
+;
+  %sel = select i1 %cmp, i32 %a, i32 42
+  %ext = sext i32 %sel to i64
+  ret i64 %ext
+}
+
+define <4 x i64> @sel_sext_vec(<4 x i32> %a, <4 x i1> %cmp) {
+; CHECK-LABEL: @sel_sext_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i32> [[A:%.*]] to <4 x i64>
+; CHECK-NEXT:    [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> <i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    ret <4 x i64> [[EXT]]
+;
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
+  %ext = sext <4 x i32> %sel to <4 x i64>
+  ret <4 x i64> %ext
+}
+
+define i64 @sel_zext(i32 %a, i1 %cmp) {
+; CHECK-LABEL: @sel_zext(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i64 [[TMP1]], i64 42
+; CHECK-NEXT:    ret i64 [[EXT]]
+;
+  %sel = select i1 %cmp, i32 %a, i32 42
+  %ext = zext i32 %sel to i64
+  ret i64 %ext
+}
+
+define <4 x i64> @sel_zext_vec(<4 x i32> %a, <4 x i1> %cmp) {
+; CHECK-LABEL: @sel_zext_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i32> [[A:%.*]] to <4 x i64>
+; CHECK-NEXT:    [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> <i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    ret <4 x i64> [[EXT]]
+;
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
+  %ext = zext <4 x i32> %sel to <4 x i64>
+  ret <4 x i64> %ext
+}
+
+; FIXME: The next 18 tests cycle through trunc+select and {larger,smaller,equal} {sext,zext,fpext} {scalar,vector}.
+; The only cases where we eliminate an instruction are equal zext with scalar/vector, so that's probably the only
+; way to justify widening the select.
+
+define i64 @trunc_sel_larger_sext(i32 %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_larger_sext(
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[TRUNC]] to i64
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i64 [[TMP1]], i64 42
+; CHECK-NEXT:    ret i64 [[EXT]]
+;
+  %trunc = trunc i32 %a to i16
+  %sel = select i1 %cmp, i16 %trunc, i16 42
+  %ext = sext i16 %sel to i64
+  ret i64 %ext
+}
+
+define <2 x i64> @trunc_sel_larger_sext_vec(<2 x i32> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_larger_sext_vec(
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i32> [[A:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i16> [[TRUNC]] to <2 x i64>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> <i64 42, i64 43>
+; CHECK-NEXT:    ret <2 x i64> [[EXT]]
+;
+  %trunc = trunc <2 x i32> %a to <2 x i16>
+  %sel = select <2 x i1> %cmp, <2 x i16> %trunc, <2 x i16> <i16 42, i16 43>
+  %ext = sext <2 x i16> %sel to <2 x i64>
+  ret <2 x i64> %ext
+}
+
+define i32 @trunc_sel_smaller_sext(i64 %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_smaller_sext(
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[A:%.*]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[TRUNC]] to i32
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i32 [[TMP1]], i32 42
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %trunc = trunc i64 %a to i16
+  %sel = select i1 %cmp, i16 %trunc, i16 42
+  %ext = sext i16 %sel to i32
+  ret i32 %ext
+}
+
+define <2 x i32> @trunc_sel_smaller_sext_vec(<2 x i64> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_smaller_sext_vec(
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i16> [[TRUNC]] to <2 x i32>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> <i32 42, i32 43>
+; CHECK-NEXT:    ret <2 x i32> [[EXT]]
+;
+  %trunc = trunc <2 x i64> %a to <2 x i16>
+  %sel = select <2 x i1> %cmp, <2 x i16> %trunc, <2 x i16> <i16 42, i16 43>
+  %ext = sext <2 x i16> %sel to <2 x i32>
+  ret <2 x i32> %ext
+}
+
+define i32 @trunc_sel_equal_sext(i32 %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_equal_sext(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[A:%.*]], 16
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 16
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i32 [[TMP2]], i32 42
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %trunc = trunc i32 %a to i16
+  %sel = select i1 %cmp, i16 %trunc, i16 42
+  %ext = sext i16 %sel to i32
+  ret i32 %ext
+}
+
+define <2 x i32> @trunc_sel_equal_sext_vec(<2 x i32> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_equal_sext_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> <i32 42, i32 43>
+; CHECK-NEXT:    ret <2 x i32> [[EXT]]
+;
+  %trunc = trunc <2 x i32> %a to <2 x i16>
+  %sel = select <2 x i1> %cmp, <2 x i16> %trunc, <2 x i16> <i16 42, i16 43>
+  %ext = sext <2 x i16> %sel to <2 x i32>
+  ret <2 x i32> %ext
+}
+
+define i64 @trunc_sel_larger_zext(i32 %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_larger_zext(
+; CHECK-NEXT:    [[TRUNC_MASK:%.*]] = and i32 [[A:%.*]], 65535
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TRUNC_MASK]] to i64
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i64 [[TMP1]], i64 42
+; CHECK-NEXT:    ret i64 [[EXT]]
+;
+  %trunc = trunc i32 %a to i16
+  %sel = select i1 %cmp, i16 %trunc, i16 42
+  %ext = zext i16 %sel to i64
+  ret i64 %ext
+}
+
+define <2 x i64> @trunc_sel_larger_zext_vec(<2 x i32> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_larger_zext_vec(
+; CHECK-NEXT:    [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[TRUNC_MASK]] to <2 x i64>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> <i64 42, i64 43>
+; CHECK-NEXT:    ret <2 x i64> [[EXT]]
+;
+  %trunc = trunc <2 x i32> %a to <2 x i16>
+  %sel = select <2 x i1> %cmp, <2 x i16> %trunc, <2 x i16> <i16 42, i16 43>
+  %ext = zext <2 x i16> %sel to <2 x i64>
+  ret <2 x i64> %ext
+}
+
+define i32 @trunc_sel_smaller_zext(i64 %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_smaller_zext(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 65535
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i32 [[TMP2]], i32 42
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %trunc = trunc i64 %a to i16
+  %sel = select i1 %cmp, i16 %trunc, i16 42
+  %ext = zext i16 %sel to i32
+  ret i32 %ext
+}
+
+define <2 x i32> @trunc_sel_smaller_zext_vec(<2 x i64> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_smaller_zext_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> <i32 42, i32 43>
+; CHECK-NEXT:    ret <2 x i32> [[EXT]]
+;
+  %trunc = trunc <2 x i64> %a to <2 x i16>
+  %sel = select <2 x i1> %cmp, <2 x i16> %trunc, <2 x i16> <i16 42, i16 43>
+  %ext = zext <2 x i16> %sel to <2 x i32>
+  ret <2 x i32> %ext
+}
+
+define i32 @trunc_sel_equal_zext(i32 %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_equal_zext(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 65535
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], i32 [[TMP1]], i32 42
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %trunc = trunc i32 %a to i16
+  %sel = select i1 %cmp, i16 %trunc, i16 42
+  %ext = zext i16 %sel to i32
+  ret i32 %ext
+}
+
+define <2 x i32> @trunc_sel_equal_zext_vec(<2 x i32> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_equal_zext_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> <i32 42, i32 43>
+; CHECK-NEXT:    ret <2 x i32> [[EXT]]
+;
+  %trunc = trunc <2 x i32> %a to <2 x i16>
+  %sel = select <2 x i1> %cmp, <2 x i16> %trunc, <2 x i16> <i16 42, i16 43>
+  %ext = zext <2 x i16> %sel to <2 x i32>
+  ret <2 x i32> %ext
+}
+
+define double @trunc_sel_larger_fpext(float %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_larger_fpext(
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext half [[TRUNC]] to double
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], double [[TMP1]], double 4.200000e+01
+; CHECK-NEXT:    ret double [[EXT]]
+;
+  %trunc = fptrunc float %a to half
+  %sel = select i1 %cmp, half %trunc, half 42.0
+  %ext = fpext half %sel to double
+  ret double %ext
+}
+
+define <2 x double> @trunc_sel_larger_fpext_vec(<2 x float> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_larger_fpext_vec(
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <2 x float> [[A:%.*]] to <2 x half>
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext <2 x half> [[TRUNC]] to <2 x double>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x double> [[TMP1]], <2 x double> <double 4.200000e+01, double 4.300000e+01>
+; CHECK-NEXT:    ret <2 x double> [[EXT]]
+;
+  %trunc = fptrunc <2 x float> %a to <2 x half>
+  %sel = select <2 x i1> %cmp, <2 x half> %trunc, <2 x half> <half 42.0, half 43.0>
+  %ext = fpext <2 x half> %sel to <2 x double>
+  ret <2 x double> %ext
+}
+
+define float @trunc_sel_smaller_fpext(double %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_smaller_fpext(
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc double [[A:%.*]] to half
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext half [[TRUNC]] to float
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], float [[TMP1]], float 4.200000e+01
+; CHECK-NEXT:    ret float [[EXT]]
+;
+  %trunc = fptrunc double %a to half
+  %sel = select i1 %cmp, half %trunc, half 42.0
+  %ext = fpext half %sel to float
+  ret float %ext
+}
+
+define <2 x float> @trunc_sel_smaller_fpext_vec(<2 x double> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_smaller_fpext_vec(
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <2 x double> [[A:%.*]] to <2 x half>
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext <2 x half> [[TRUNC]] to <2 x float>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x float> [[TMP1]], <2 x float> <float 4.200000e+01, float 4.300000e+01>
+; CHECK-NEXT:    ret <2 x float> [[EXT]]
+;
+  %trunc = fptrunc <2 x double> %a to <2 x half>
+  %sel = select <2 x i1> %cmp, <2 x half> %trunc, <2 x half> <half 42.0, half 43.0>
+  %ext = fpext <2 x half> %sel to <2 x float>
+  ret <2 x float> %ext
+}
+
+define float @trunc_sel_equal_fpext(float %a, i1 %cmp) {
+; CHECK-LABEL: @trunc_sel_equal_fpext(
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext half [[TRUNC]] to float
+; CHECK-NEXT:    [[EXT:%.*]] = select i1 [[CMP:%.*]], float [[TMP1]], float 4.200000e+01
+; CHECK-NEXT:    ret float [[EXT]]
+;
+  %trunc = fptrunc float %a to half
+  %sel = select i1 %cmp, half %trunc, half 42.0
+  %ext = fpext half %sel to float
+  ret float %ext
+}
+
+define <2 x float> @trunc_sel_equal_fpext_vec(<2 x float> %a, <2 x i1> %cmp) {
+; CHECK-LABEL: @trunc_sel_equal_fpext_vec(
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <2 x float> [[A:%.*]] to <2 x half>
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext <2 x half> [[TRUNC]] to <2 x float>
+; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x float> [[TMP1]], <2 x float> <float 4.200000e+01, float 4.300000e+01>
+; CHECK-NEXT:    ret <2 x float> [[EXT]]
+;
+  %trunc = fptrunc <2 x float> %a to <2 x half>
+  %sel = select <2 x i1> %cmp, <2 x half> %trunc, <2 x half> <half 42.0, half 43.0>
+  %ext = fpext <2 x half> %sel to <2 x float>
+  ret <2 x float> %ext
+}
+
+define i32 @test_sext1(i1 %cca, i1 %ccb) {
+; CHECK-LABEL: @test_sext1(
+; CHECK-NEXT:    [[NARROW:%.*]] = and i1 [[CCB:%.*]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sext i1 [[NARROW]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ccax = sext i1 %cca to i32
+  %r = select i1 %ccb, i32 %ccax, i32 0
+  ret i32 %r
+}
+
+define i32 @test_sext2(i1 %cca, i1 %ccb) {
+; CHECK-LABEL: @test_sext2(
+; CHECK-NEXT:    [[NARROW:%.*]] = or i1 [[CCB:%.*]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sext i1 [[NARROW]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ccax = sext i1 %cca to i32
+  %r = select i1 %ccb, i32 -1, i32 %ccax
+  ret i32 %r
+}
+
+define i32 @test_sext3(i1 %cca, i1 %ccb) {
+; CHECK-LABEL: @test_sext3(
+; CHECK-NEXT:    [[NOT_CCB:%.*]] = xor i1 [[CCB:%.*]], true
+; CHECK-NEXT:    [[NARROW:%.*]] = and i1 [[NOT_CCB]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sext i1 [[NARROW]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ccax = sext i1 %cca to i32
+  %r = select i1 %ccb, i32 0, i32 %ccax
+  ret i32 %r
+}
+
+define i32 @test_sext4(i1 %cca, i1 %ccb) {
+; CHECK-LABEL: @test_sext4(
+; CHECK-NEXT:    [[NOT_CCB:%.*]] = xor i1 [[CCB:%.*]], true
+; CHECK-NEXT:    [[NARROW:%.*]] = or i1 [[NOT_CCB]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sext i1 [[NARROW]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ccax = sext i1 %cca to i32
+  %r = select i1 %ccb, i32 %ccax, i32 -1
+  ret i32 %r
+}
+
+define i32 @test_zext1(i1 %cca, i1 %ccb) {
+; CHECK-LABEL: @test_zext1(
+; CHECK-NEXT:    [[NARROW:%.*]] = and i1 [[CCB:%.*]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[NARROW]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ccax = zext i1 %cca to i32
+  %r = select i1 %ccb, i32 %ccax, i32 0
+  ret i32 %r
+}
+
+define i32 @test_zext2(i1 %cca, i1 %ccb) {
+; CHECK-LABEL: @test_zext2(
+; CHECK-NEXT:    [[NARROW:%.*]] = or i1 [[CCB:%.*]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[NARROW]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ccax = zext i1 %cca to i32
+  %r = select i1 %ccb, i32 1, i32 %ccax
+  ret i32 %r
+}
+
+define i32 @test_zext3(i1 %cca, i1 %ccb) {
+; CHECK-LABEL: @test_zext3(
+; CHECK-NEXT:    [[NOT_CCB:%.*]] = xor i1 [[CCB:%.*]], true
+; CHECK-NEXT:    [[NARROW:%.*]] = and i1 [[NOT_CCB]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[NARROW]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ccax = zext i1 %cca to i32
+  %r = select i1 %ccb, i32 0, i32 %ccax
+  ret i32 %r
+}
+
+define i32 @test_zext4(i1 %cca, i1 %ccb) {
+; CHECK-LABEL: @test_zext4(
+; CHECK-NEXT:    [[NOT_CCB:%.*]] = xor i1 [[CCB:%.*]], true
+; CHECK-NEXT:    [[NARROW:%.*]] = or i1 [[NOT_CCB]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[NARROW]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ccax = zext i1 %cca to i32
+  %r = select i1 %ccb, i32 %ccax, i32 1
+  ret i32 %r
+}
+
+define i32 @test_negative_sext(i1 %a, i1 %cc) {
+; CHECK-LABEL: @test_negative_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i1 [[A:%.*]] to i32
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], i32 [[A_EXT]], i32 1
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a.ext = sext i1 %a to i32
+  %r = select i1 %cc, i32 %a.ext, i32 1
+  ret i32 %r
+}
+
+define i32 @test_negative_zext(i1 %a, i1 %cc) {
+; CHECK-LABEL: @test_negative_zext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext i1 [[A:%.*]] to i32
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], i32 [[A_EXT]], i32 -1
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a.ext = zext i1 %a to i32
+  %r = select i1 %cc, i32 %a.ext, i32 -1
+  ret i32 %r
+}
+
+define i32 @test_bits_sext(i8 %a, i1 %cc) {
+; CHECK-LABEL: @test_bits_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], i32 [[A_EXT]], i32 -128
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a.ext = sext i8 %a to i32
+  %r = select i1 %cc, i32 %a.ext, i32 -128
+  ret i32 %r
+}
+
+define i32 @test_bits_zext(i8 %a, i1 %cc) {
+; CHECK-LABEL: @test_bits_zext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], i32 [[A_EXT]], i32 255
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a.ext = zext i8 %a to i32
+  %r = select i1 %cc, i32 %a.ext, i32 255
+  ret i32 %r
+}
+
+define i32 @test_op_op(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @test_op_op(
+; CHECK-NEXT:    [[CCA:%.*]] = icmp sgt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[CCB:%.*]] = icmp sgt i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[CCC:%.*]] = icmp sgt i32 [[C:%.*]], 0
+; CHECK-NEXT:    [[R_V:%.*]] = select i1 [[CCC]], i1 [[CCA]], i1 [[CCB]]
+; CHECK-NEXT:    [[R:%.*]] = sext i1 [[R_V]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %cca = icmp sgt i32 %a, 0
+  %ccax = sext i1 %cca to i32
+  %ccb = icmp sgt i32 %b, 0
+  %ccbx = sext i1 %ccb to i32
+  %ccc = icmp sgt i32 %c, 0
+  %r = select i1 %ccc, i32 %ccax, i32 %ccbx
+  ret i32 %r
+}
+
+define <2 x i32> @test_vectors_sext(<2 x i1> %cca, <2 x i1> %ccb) {
+; CHECK-LABEL: @test_vectors_sext(
+; CHECK-NEXT:    [[NARROW:%.*]] = and <2 x i1> [[CCB:%.*]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sext <2 x i1> [[NARROW]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %ccax = sext <2 x i1> %cca to <2 x i32>
+  %r = select <2 x i1> %ccb, <2 x i32> %ccax, <2 x i32> <i32 0, i32 0>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @test_vectors_sext_nonsplat(<2 x i1> %cca, <2 x i1> %ccb) {
+; CHECK-LABEL: @test_vectors_sext_nonsplat(
+; CHECK-NEXT:    [[NARROW:%.*]] = select <2 x i1> [[CCB:%.*]], <2 x i1> [[CCA:%.*]], <2 x i1> <i1 false, i1 true>
+; CHECK-NEXT:    [[R:%.*]] = sext <2 x i1> [[NARROW]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %ccax = sext <2 x i1> %cca to <2 x i32>
+  %r = select <2 x i1> %ccb, <2 x i32> %ccax, <2 x i32> <i32 0, i32 -1>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @test_vectors_zext(<2 x i1> %cca, <2 x i1> %ccb) {
+; CHECK-LABEL: @test_vectors_zext(
+; CHECK-NEXT:    [[NARROW:%.*]] = and <2 x i1> [[CCB:%.*]], [[CCA:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[NARROW]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %ccax = zext <2 x i1> %cca to <2 x i32>
+  %r = select <2 x i1> %ccb, <2 x i32> %ccax, <2 x i32> <i32 0, i32 0>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @test_vectors_zext_nonsplat(<2 x i1> %cca, <2 x i1> %ccb) {
+; CHECK-LABEL: @test_vectors_zext_nonsplat(
+; CHECK-NEXT:    [[NARROW:%.*]] = select <2 x i1> [[CCB:%.*]], <2 x i1> [[CCA:%.*]], <2 x i1> <i1 true, i1 false>
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[NARROW]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %ccax = zext <2 x i1> %cca to <2 x i32>
+  %r = select <2 x i1> %ccb, <2 x i32> %ccax, <2 x i32> <i32 1, i32 0>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @scalar_select_of_vectors_sext(<2 x i1> %cca, i1 %ccb) {
+; CHECK-LABEL: @scalar_select_of_vectors_sext(
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[CCB:%.*]], <2 x i1> [[CCA:%.*]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = sext <2 x i1> [[NARROW]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %ccax = sext <2 x i1> %cca to <2 x i32>
+  %r = select i1 %ccb, <2 x i32> %ccax, <2 x i32> <i32 0, i32 0>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @scalar_select_of_vectors_zext(<2 x i1> %cca, i1 %ccb) {
+; CHECK-LABEL: @scalar_select_of_vectors_zext(
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[CCB:%.*]], <2 x i1> [[CCA:%.*]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[NARROW]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %ccax = zext <2 x i1> %cca to <2 x i32>
+  %r = select i1 %ccb, <2 x i32> %ccax, <2 x i32> <i32 0, i32 0>
+  ret <2 x i32> %r
+}
+
+define i32 @sext_true_val_must_be_all_ones(i1 %x) {
+; CHECK-LABEL: @sext_true_val_must_be_all_ones(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[X:%.*]], i32 -1, i32 42, !prof !0
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %ext = sext i1 %x to i32
+  %sel = select i1 %x, i32 %ext, i32 42, !prof !0
+  ret i32 %sel
+}
+
+define <2 x i32> @sext_true_val_must_be_all_ones_vec(<2 x i1> %x) {
+; CHECK-LABEL: @sext_true_val_must_be_all_ones_vec(
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 42, i32 12>, !prof !0
+; CHECK-NEXT:    ret <2 x i32> [[SEL]]
+;
+  %ext = sext <2 x i1> %x to <2 x i32>
+  %sel = select <2 x i1> %x, <2 x i32> %ext, <2 x i32> <i32 42, i32 12>, !prof !0
+  ret <2 x i32> %sel
+}
+
+define i32 @zext_true_val_must_be_one(i1 %x) {
+; CHECK-LABEL: @zext_true_val_must_be_one(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[X:%.*]], i32 1, i32 42, !prof !0
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %ext = zext i1 %x to i32
+  %sel = select i1 %x, i32 %ext, i32 42, !prof !0
+  ret i32 %sel
+}
+
+define <2 x i32> @zext_true_val_must_be_one_vec(<2 x i1> %x) {
+; CHECK-LABEL: @zext_true_val_must_be_one_vec(
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 1, i32 1>, <2 x i32> <i32 42, i32 12>, !prof !0
+; CHECK-NEXT:    ret <2 x i32> [[SEL]]
+;
+  %ext = zext <2 x i1> %x to <2 x i32>
+  %sel = select <2 x i1> %x, <2 x i32> %ext, <2 x i32> <i32 42, i32 12>, !prof !0
+  ret <2 x i32> %sel
+}
+
+define i32 @sext_false_val_must_be_zero(i1 %x) {
+; CHECK-LABEL: @sext_false_val_must_be_zero(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[X:%.*]], i32 42, i32 0, !prof !0
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %ext = sext i1 %x to i32
+  %sel = select i1 %x, i32 42, i32 %ext, !prof !0
+  ret i32 %sel
+}
+
+define <2 x i32> @sext_false_val_must_be_zero_vec(<2 x i1> %x) {
+; CHECK-LABEL: @sext_false_val_must_be_zero_vec(
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 42, i32 12>, <2 x i32> zeroinitializer, !prof !0
+; CHECK-NEXT:    ret <2 x i32> [[SEL]]
+;
+  %ext = sext <2 x i1> %x to <2 x i32>
+  %sel = select <2 x i1> %x, <2 x i32> <i32 42, i32 12>, <2 x i32> %ext, !prof !0
+  ret <2 x i32> %sel
+}
+
+define i32 @zext_false_val_must_be_zero(i1 %x) {
+; CHECK-LABEL: @zext_false_val_must_be_zero(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[X:%.*]], i32 42, i32 0, !prof !0
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %ext = zext i1 %x to i32
+  %sel = select i1 %x, i32 42, i32 %ext, !prof !0
+  ret i32 %sel
+}
+
+define <2 x i32> @zext_false_val_must_be_zero_vec(<2 x i1> %x) {
+; CHECK-LABEL: @zext_false_val_must_be_zero_vec(
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 42, i32 12>, <2 x i32> zeroinitializer, !prof !0
+; CHECK-NEXT:    ret <2 x i32> [[SEL]]
+;
+  %ext = zext <2 x i1> %x to <2 x i32>
+  %sel = select <2 x i1> %x, <2 x i32> <i32 42, i32 12>, <2 x i32> %ext, !prof !0
+  ret <2 x i32> %sel
+}
+
+!0 = !{!"branch_weights", i32 3, i32 5}
+
diff --git a/llvm/test/Transforms/InstCombine/select-cmp-br.ll b/llvm/test/Transforms/InstCombine/select-cmp-br.ll
new file mode 100644
index 00000000000..06f32828c96
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-cmp-br.ll
@@ -0,0 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Replace a 'select' with 'or' in 'select - cmp [eq|ne] - br' sequence
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+%struct.S = type { i64*, i32, i32 }
+%C = type <{ %struct.S }>
+
+declare void @bar(%struct.S*)
+declare void @foobar()
+
+define void @test1(%C* %arg) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG:%.*]], i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
+; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
+; CHECK-NEXT:    [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP9]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[M]], i64 9
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i64 (%C*)**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64 (%C*)*, i64 (%C*)** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](%C* [[ARG]])
+; CHECK-NEXT:    br label [[BB]]
+;
+entry:
+  %tmp = getelementptr inbounds %C, %C* %arg, i64 0, i32 0, i32 0
+  %m = load i64*, i64** %tmp, align 8
+  %tmp1 = getelementptr inbounds %C, %C* %arg, i64 1, i32 0, i32 0
+  %n = load i64*, i64** %tmp1, align 8
+  %tmp2 = getelementptr inbounds i64, i64* %m, i64 9
+  %tmp3 = bitcast i64* %tmp2 to i64 (%C*)**
+  %tmp4 = load i64 (%C*)*, i64 (%C*)** %tmp3, align 8
+  %tmp5 = icmp eq i64* %m, %n
+  %tmp6 = select i1 %tmp5, %C* %arg, %C* null
+  %tmp7 = icmp eq %C* %tmp6, null
+  br i1 %tmp7, label %bb10, label %bb8
+
+bb:                                               ; preds = %bb10, %bb8
+  ret void
+
+bb8:                                              ; preds = %entry
+  %tmp9 = getelementptr inbounds %C, %C* %tmp6, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp9)
+  br label %bb
+
+bb10:                                             ; preds = %entry
+  %tmp11 = tail call i64 %tmp4(%C* %arg)
+  br label %bb
+}
+
+define void @test2(%C* %arg) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG:%.*]], i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
+; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP71]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP9]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[M]], i64 9
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i64 (%C*)**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64 (%C*)*, i64 (%C*)** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](%C* [[ARG]])
+; CHECK-NEXT:    br label [[BB]]
+;
+entry:
+  %tmp = getelementptr inbounds %C, %C* %arg, i64 0, i32 0, i32 0
+  %m = load i64*, i64** %tmp, align 8
+  %tmp1 = getelementptr inbounds %C, %C* %arg, i64 1, i32 0, i32 0
+  %n = load i64*, i64** %tmp1, align 8
+  %tmp2 = getelementptr inbounds i64, i64* %m, i64 9
+  %tmp3 = bitcast i64* %tmp2 to i64 (%C*)**
+  %tmp4 = load i64 (%C*)*, i64 (%C*)** %tmp3, align 8
+  %tmp5 = icmp eq i64* %m, %n
+  %tmp6 = select i1 %tmp5, %C* null, %C* %arg
+  %tmp7 = icmp eq %C* %tmp6, null
+  br i1 %tmp7, label %bb10, label %bb8
+
+bb:                                               ; preds = %bb10, %bb8
+  ret void
+
+bb8:                                              ; preds = %entry
+  %tmp9 = getelementptr inbounds %C, %C* %tmp6, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp9)
+  br label %bb
+
+bb10:                                             ; preds = %entry
+  %tmp11 = tail call i64 %tmp4(%C* %arg)
+  br label %bb
+}
+
+define void @test3(%C* %arg) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG:%.*]], i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
+; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
+; CHECK-NEXT:    [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP9]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[M]], i64 9
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i64 (%C*)**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64 (%C*)*, i64 (%C*)** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](%C* [[ARG]])
+; CHECK-NEXT:    br label [[BB]]
+;
+entry:
+  %tmp = getelementptr inbounds %C, %C* %arg, i64 0, i32 0, i32 0
+  %m = load i64*, i64** %tmp, align 8
+  %tmp1 = getelementptr inbounds %C, %C* %arg, i64 1, i32 0, i32 0
+  %n = load i64*, i64** %tmp1, align 8
+  %tmp2 = getelementptr inbounds i64, i64* %m, i64 9
+  %tmp3 = bitcast i64* %tmp2 to i64 (%C*)**
+  %tmp4 = load i64 (%C*)*, i64 (%C*)** %tmp3, align 8
+  %tmp5 = icmp eq i64* %m, %n
+  %tmp6 = select i1 %tmp5, %C* %arg, %C* null
+  %tmp7 = icmp ne %C* %tmp6, null
+  br i1 %tmp7, label %bb8, label %bb10
+
+bb:                                               ; preds = %bb10, %bb8
+  ret void
+
+bb8:                                              ; preds = %entry
+  %tmp9 = getelementptr inbounds %C, %C* %tmp6, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp9)
+  br label %bb
+
+bb10:                                             ; preds = %entry
+  %tmp11 = tail call i64 %tmp4(%C* %arg)
+  br label %bb
+}
+
+define void @test4(%C* %arg) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG:%.*]], i64 0, i32 0, i32 0
+; CHECK-NEXT:    [[M:%.*]] = load i64*, i64** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0
+; CHECK-NEXT:    [[N:%.*]] = load i64*, i64** [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64* [[M]], [[N]]
+; CHECK-NEXT:    [[TMP71:%.*]] = icmp eq %C* [[ARG]], null
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP71]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP9]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[M]], i64 9
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to i64 (%C*)**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64 (%C*)*, i64 (%C*)** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](%C* [[ARG]])
+; CHECK-NEXT:    br label [[BB]]
+;
+entry:
+  %tmp = getelementptr inbounds %C, %C* %arg, i64 0, i32 0, i32 0
+  %m = load i64*, i64** %tmp, align 8
+  %tmp1 = getelementptr inbounds %C, %C* %arg, i64 1, i32 0, i32 0
+  %n = load i64*, i64** %tmp1, align 8
+  %tmp2 = getelementptr inbounds i64, i64* %m, i64 9
+  %tmp3 = bitcast i64* %tmp2 to i64 (%C*)**
+  %tmp4 = load i64 (%C*)*, i64 (%C*)** %tmp3, align 8
+  %tmp5 = icmp eq i64* %m, %n
+  %tmp6 = select i1 %tmp5, %C* null, %C* %arg
+  %tmp7 = icmp ne %C* %tmp6, null
+  br i1 %tmp7, label %bb8, label %bb10
+
+bb:                                               ; preds = %bb10, %bb8
+  ret void
+
+bb8:                                              ; preds = %entry
+  %tmp9 = getelementptr inbounds %C, %C* %tmp6, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp9)
+  br label %bb
+
+bb10:                                             ; preds = %entry
+  %tmp11 = tail call i64 %tmp4(%C* %arg)
+  br label %bb
+}
+
+define void @test5(%C* %arg, i1 %arg1) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq %C* [[ARG:%.*]], null
+; CHECK-NEXT:    [[TMP2:%.*]] = or i1 [[TMP21]], [[ARG1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[BB5:%.*]], label [[BB3:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[C:%.*]], %C* [[ARG]], i64 0, i32 0
+; CHECK-NEXT:    tail call void @bar(%struct.S* [[TMP4]])
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    tail call void @foobar()
+; CHECK-NEXT:    br label [[BB]]
+;
+entry:
+  %tmp = select i1 %arg1, %C* null, %C* %arg
+  %tmp2 = icmp ne %C* %tmp, null
+  br i1 %tmp2, label %bb3, label %bb5
+
+bb:                                               ; preds = %bb5, %bb3
+  ret void
+
+bb3:                                              ; preds = %entry
+  %tmp4 = getelementptr inbounds %C, %C* %tmp, i64 0, i32 0
+  tail call void @bar(%struct.S* %tmp4)
+  br label %bb
+
+bb5:                                              ; preds = %entry
+  tail call void @foobar()
+  br label %bb
+}
+
+; Negative test. Must not trigger the select-cmp-br combine because the result
+; of the select is used in both flows following the br (the special case where
+; the conditional branch has the same target for both flows).
+define i32 @test6(i32 %arg, i1 %arg1) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[BB:%.*]], label [[BB]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP:%.*]] = select i1 [[ARG1:%.*]], i32 [[ARG:%.*]], i32 0
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+entry:
+  %tmp = select i1 %arg1, i32 %arg, i32 0
+  %tmp2 = icmp eq i32 %tmp, 0
+  br i1 %tmp2, label %bb, label %bb
+
+bb:                                               ; preds = %entry, %entry
+  ret i32 %tmp
+}
diff --git a/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
new file mode 100644
index 00000000000..606cdedca7a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
@@ -0,0 +1,459 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; This test is to verify that the instruction combiner is able to fold
+; a cttz/ctlz followed by a icmp + select into a single cttz/ctlz with
+; the 'is_zero_undef' flag cleared.
+
+define i16 @test1(i16 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %ct = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i16 %ct, i16 16
+  ret i16 %cond
+}
+
+define i32 @test2(i32 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %ct = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i32 %ct, i32 32
+  ret i32 %cond
+}
+
+define i64 @test3(i64 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X:%.*]], i1 false), !range !2
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %ct = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i64 %ct, i64 64
+  ret i64 %cond
+}
+
+define i16 @test4(i16 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %ct = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %tobool = icmp eq i16 %x, 0
+  %cond = select i1 %tobool, i16 16, i16 %ct
+  ret i16 %cond
+}
+
+define i32 @test5(i32 %x) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %ct = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %tobool = icmp eq i32 %x, 0
+  %cond = select i1 %tobool, i32 32, i32 %ct
+  ret i32 %cond
+}
+
+define i64 @test6(i64 %x) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X:%.*]], i1 false), !range !2
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %ct = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %tobool = icmp eq i64 %x, 0
+  %cond = select i1 %tobool, i64 64, i64 %ct
+  ret i64 %cond
+}
+
+define i16 @test1b(i16 %x) {
+; CHECK-LABEL: @test1b(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %ct = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i16 %ct, i16 16
+  ret i16 %cond
+}
+
+define i32 @test2b(i32 %x) {
+; CHECK-LABEL: @test2b(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i32 %ct, i32 32
+  ret i32 %cond
+}
+
+define i64 @test3b(i64 %x) {
+; CHECK-LABEL: @test3b(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range !2
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %ct = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i64 %ct, i64 64
+  ret i64 %cond
+}
+
+define i16 @test4b(i16 %x) {
+; CHECK-LABEL: @test4b(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %ct = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %tobool = icmp eq i16 %x, 0
+  %cond = select i1 %tobool, i16 16, i16 %ct
+  ret i16 %cond
+}
+
+define i32 @test5b(i32 %x) {
+; CHECK-LABEL: @test5b(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %tobool = icmp eq i32 %x, 0
+  %cond = select i1 %tobool, i32 32, i32 %ct
+  ret i32 %cond
+}
+
+define i64 @test6b(i64 %x) {
+; CHECK-LABEL: @test6b(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range !2
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %ct = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %tobool = icmp eq i64 %x, 0
+  %cond = select i1 %tobool, i64 64, i64 %ct
+  ret i64 %cond
+}
+
+define i32 @test1c(i16 %x) {
+; CHECK-LABEL: @test1c(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %ct = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %cast2 = zext i16 %ct to i32
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i32 %cast2, i32 16
+  ret i32 %cond
+}
+
+define i64 @test2c(i16 %x) {
+; CHECK-LABEL: @test2c(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ct = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %conv = zext i16 %ct to i64
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i64 %conv, i64 16
+  ret i64 %cond
+}
+
+define i64 @test3c(i32 %x) {
+; CHECK-LABEL: @test3c(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %conv = zext i32 %ct to i64
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i64 %conv, i64 32
+  ret i64 %cond
+}
+
+define i32 @test4c(i16 %x) {
+; CHECK-LABEL: @test4c(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %ct = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %cast = zext i16 %ct to i32
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 16
+  ret i32 %cond
+}
+
+define i64 @test5c(i16 %x) {
+; CHECK-LABEL: @test5c(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[X:%.*]], i1 false), !range !0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ct = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %cast = zext i16 %ct to i64
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i64 %cast, i64 16
+  ret i64 %cond
+}
+
+define i64 @test6c(i32 %x) {
+; CHECK-LABEL: @test6c(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ct = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %cast = zext i32 %ct to i64
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i64 %cast, i64 32
+  ret i64 %cond
+}
+
+define i16 @test1d(i64 %x) {
+; CHECK-LABEL: @test1d(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range !2
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %ct = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %conv = trunc i64 %ct to i16
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i16 %conv, i16 64
+  ret i16 %cond
+}
+
+define i32 @test2d(i64 %x) {
+; CHECK-LABEL: @test2d(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X:%.*]], i1 false), !range !2
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %ct = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %ct to i32
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 64
+  ret i32 %cond
+}
+
+define i16 @test3d(i32 %x) {
+; CHECK-LABEL: @test3d(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %ct to i16
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 32
+  ret i16 %cond
+}
+
+define i16 @test4d(i64 %x) {
+; CHECK-LABEL: @test4d(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X:%.*]], i1 false), !range !2
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %ct = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %ct to i16
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 64
+  ret i16 %cond
+}
+
+define i32 @test5d(i64 %x) {
+; CHECK-LABEL: @test5d(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X:%.*]], i1 false), !range !2
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %ct = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %ct to i32
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 64
+  ret i32 %cond
+}
+
+define i16 @test6d(i32 %x) {
+; CHECK-LABEL: @test6d(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %ct = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %ct to i16
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 32
+  ret i16 %cond
+}
+
+define i64 @select_bug1(i32 %x) {
+; CHECK-LABEL: @select_bug1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  %conv = zext i32 %ct to i64
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i64 %conv, i64 32
+  ret i64 %cond
+}
+
+define i16 @select_bug2(i32 %x) {
+; CHECK-LABEL: @select_bug2(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  %conv = trunc i32 %ct to i16
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i16 %conv, i16 32
+  ret i16 %cond
+}
+
+define i128 @test7(i128 %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i128 @llvm.ctlz.i128(i128 [[X:%.*]], i1 false), !range !3
+; CHECK-NEXT:    ret i128 [[TMP1]]
+;
+  %ct = tail call i128 @llvm.ctlz.i128(i128 %x, i1 true)
+  %tobool = icmp ne i128 %x, 0
+  %cond = select i1 %tobool, i128 %ct, i128 128
+  ret i128 %cond
+}
+
+define i128 @test8(i128 %x) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i128 @llvm.cttz.i128(i128 [[X:%.*]], i1 false), !range !3
+; CHECK-NEXT:    ret i128 [[TMP1]]
+;
+  %ct = tail call i128 @llvm.cttz.i128(i128 %x, i1 true)
+  %tobool = icmp ne i128 %x, 0
+  %cond = select i1 %tobool, i128 %ct, i128 128
+  ret i128 %cond
+}
+
+define i32 @test_ctlz_not_bw(i32 %x) {
+; CHECK-LABEL: @test_ctlz_not_bw(
+; CHECK-NEXT:    [[CT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), !range !1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 123, i32 [[CT]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %ct = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp ne i32 %x, 0
+  %res = select i1 %cmp, i32 %ct, i32 123
+  ret i32 %res
+}
+
+define i32 @test_ctlz_not_bw_multiuse(i32 %x) {
+; CHECK-LABEL: @test_ctlz_not_bw_multiuse(
+; CHECK-NEXT:    [[CT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 123, i32 [[CT]]
+; CHECK-NEXT:    [[RES:%.*]] = or i32 [[SEL]], [[CT]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %ct = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %cmp = icmp ne i32 %x, 0
+  %sel = select i1 %cmp, i32 %ct, i32 123
+  %res = or i32 %sel, %ct
+  ret i32 %res
+}
+
+define i32 @test_cttz_not_bw(i32 %x) {
+; CHECK-LABEL: @test_cttz_not_bw(
+; CHECK-NEXT:    [[CT:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range !1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 123, i32 [[CT]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  %cmp = icmp ne i32 %x, 0
+  %res = select i1 %cmp, i32 %ct, i32 123
+  ret i32 %res
+}
+
+define i32 @test_cttz_not_bw_multiuse(i32 %x) {
+; CHECK-LABEL: @test_cttz_not_bw_multiuse(
+; CHECK-NEXT:    [[CT:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 123, i32 [[CT]]
+; CHECK-NEXT:    [[RES:%.*]] = or i32 [[SEL]], [[CT]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %ct = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  %cmp = icmp ne i32 %x, 0
+  %sel = select i1 %cmp, i32 %ct, i32 123
+  %res = or i32 %sel, %ct
+  ret i32 %res
+}
+
+define <2 x i32> @test_ctlz_bw_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test_ctlz_bw_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %ct = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
+  %cmp = icmp ne <2 x i32> %x, zeroinitializer
+  %res = select <2 x i1> %cmp, <2 x i32> %ct, <2 x i32> <i32 32, i32 32>
+  ret <2 x i32> %res
+}
+
+define <2 x i32> @test_ctlz_not_bw_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test_ctlz_not_bw_vec(
+; CHECK-NEXT:    [[CT:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 true)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> [[CT]]
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
+  %ct = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ne <2 x i32> %x, zeroinitializer
+  %res = select <2 x i1> %cmp, <2 x i32> %ct, <2 x i32> <i32 0, i32 0>
+  ret <2 x i32> %res
+}
+
+define <2 x i32> @test_cttz_bw_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test_cttz_bw_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %ct = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true)
+  %cmp = icmp ne <2 x i32> %x, zeroinitializer
+  %res = select <2 x i1> %cmp, <2 x i32> %ct, <2 x i32> <i32 32, i32 32>
+  ret <2 x i32> %res
+}
+
+define <2 x i32> @test_cttz_not_bw_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test_cttz_not_bw_vec(
+; CHECK-NEXT:    [[CT:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 true)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> [[CT]]
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
+  %ct = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
+  %cmp = icmp ne <2 x i32> %x, zeroinitializer
+  %res = select <2 x i1> %cmp, <2 x i32> %ct, <2 x i32> <i32 0, i32 0>
+  ret <2 x i32> %res
+}
+
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i128 @llvm.ctlz.i128(i128, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i128 @llvm.cttz.i128(i128, i1)
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
diff --git a/llvm/test/Transforms/InstCombine/select-cmpxchg.ll b/llvm/test/Transforms/InstCombine/select-cmpxchg.ll
new file mode 100644
index 00000000000..d14fcad861f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-cmpxchg.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @cmpxchg_0(i64* %ptr, i64 %compare, i64 %new_value) {
+; CHECK-LABEL: @cmpxchg_0(
+; CHECK-NEXT:    %tmp0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value seq_cst seq_cst
+; CHECK-NEXT:    %tmp2 = extractvalue { i64, i1 } %tmp0, 0
+; CHECK-NEXT:    ret i64 %tmp2
+;
+  %tmp0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value seq_cst seq_cst
+  %tmp1 = extractvalue { i64, i1 } %tmp0, 1
+  %tmp2 = extractvalue { i64, i1 } %tmp0, 0
+  %tmp3 = select i1 %tmp1, i64 %compare, i64 %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @cmpxchg_1(i64* %ptr, i64 %compare, i64 %new_value) {
+; CHECK-LABEL: @cmpxchg_1(
+; CHECK-NEXT:    %tmp0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value seq_cst seq_cst
+; CHECK-NEXT:    ret i64 %compare
+;
+  %tmp0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value seq_cst seq_cst
+  %tmp1 = extractvalue { i64, i1 } %tmp0, 1
+  %tmp2 = extractvalue { i64, i1 } %tmp0, 0
+  %tmp3 = select i1 %tmp1, i64 %tmp2, i64 %compare
+  ret i64 %tmp3
+}
+
+define i64 @cmpxchg_2(i64* %ptr, i64 %compare, i64 %new_value) {
+; CHECK-LABEL: @cmpxchg_2(
+; CHECK-NEXT:    %tmp0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value acq_rel monotonic
+; CHECK-NEXT:    ret i64 %compare
+;
+  %tmp0 = cmpxchg i64* %ptr, i64 %compare, i64 %new_value acq_rel monotonic
+  %tmp1 = extractvalue { i64, i1 } %tmp0, 1
+  %tmp2 = extractvalue { i64, i1 } %tmp0, 0
+  %tmp3 = select i1 %tmp1, i64 %compare, i64 %tmp2
+  %tmp4 = select i1 %tmp1, i64 %tmp3, i64 %compare
+  ret i64 %tmp4
+}
diff --git a/llvm/test/Transforms/InstCombine/select-crash-noverify.ll b/llvm/test/Transforms/InstCombine/select-crash-noverify.ll
new file mode 100644
index 00000000000..4a366aa8fb8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-crash-noverify.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -disable-verify -instcombine -S | opt -S | FileCheck %s
+; Formerly crashed, PR8490.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i1 %bool, i32 %a) {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %select = select i1 %bool, i32 %a, i32 %select
+  %select2 = select i1 %bool, i32 %select2, i32 %a
+  %sum = add i32 %select, %select2
+  ret i32 %sum
+
+return:
+  ret i32 7
+}
diff --git a/llvm/test/Transforms/InstCombine/select-crash.ll b/llvm/test/Transforms/InstCombine/select-crash.ll
new file mode 100644
index 00000000000..41b69d25104
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-crash.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Formerly crashed, PR8490.
+
+define fastcc double @gimp_operation_color_balance_map(float %value, double %highlights) nounwind readnone inlinehint {
+entry:
+; CHECK: gimp_operation_color_balance_map
+; CHECK: fsub double -0.000000
+  %conv = fpext float %value to double
+  %div = fdiv double %conv, 1.600000e+01
+  %add = fadd double %div, 1.000000e+00
+  %div1 = fdiv double 1.000000e+00, %add
+  %sub = fsub double 1.075000e+00, %div1
+  %sub24 = fsub double 1.000000e+00, %sub
+  %add26 = fadd double %sub, 1.000000e+00
+  %cmp86 = fcmp ogt double %highlights, 0.000000e+00
+  %cond90 = select i1 %cmp86, double %sub24, double %add26
+  %mul91 = fmul double %highlights, %cond90
+  %add94 = fadd double %mul91, %mul91
+  ret double %add94
+}
+
+; PR10180: same crash, but with vectors
+define <4 x float> @foo(i1 %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: @foo(
+; CHECK: fsub <4 x float>
+; CHECK: select
+; CHECK: fadd <4 x float>
+  %a = fadd <4 x float> %x, %y
+  %sub = fsub <4 x float> %x, %z
+  %sel = select i1 %b, <4 x float> %a, <4 x float> %sub 
+  ret <4 x float> %sel
+}
diff --git a/llvm/test/Transforms/InstCombine/select-extractelement.ll b/llvm/test/Transforms/InstCombine/select-extractelement.ll
new file mode 100644
index 00000000000..79d0b47f97d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-extractelement.ll
@@ -0,0 +1,146 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @v4float_user(<4 x float>) #0
+
+define float @extract_one_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %c, 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
+; CHECK-NEXT:    ret float [[EXTRACT]]
+;
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  ret float %extract
+}
+
+; Multiple extractelements
+define <2 x float> @extract_two_select(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_two_select(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %c, 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x float> [[BUILD2]]
+;
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; Select has an extra non-extractelement user, don't change it
+define float @extract_one_select_user(<4 x float> %a, <4 x float> %b, i32 %c) #0 {
+; CHECK-LABEL: @extract_one_select_user(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %c, 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
+; CHECK-NEXT:    call void @v4float_user(<4 x float> [[SEL]])
+; CHECK-NEXT:    ret float [[EXTRACT]]
+;
+  %cmp = icmp ne i32 %c, 0
+  %sel = select i1 %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+define float @extract_one_vselect_user(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect_user(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> %c, zeroinitializer
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <4 x float> [[SEL]], i32 2
+; CHECK-NEXT:    call void @v4float_user(<4 x float> [[SEL]])
+; CHECK-NEXT:    ret float [[EXTRACT]]
+;
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %sel, i32 2
+  call void @v4float_user(<4 x float> %sel)
+  ret float %extract
+}
+
+; Do not convert the vector select into a scalar select. That would increase 
+; the instruction count and potentially obfuscate a vector min/max idiom.
+
+define float @extract_one_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_one_vselect(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> %c, zeroinitializer
+; CHECK-NEXT:    [[SELECT:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <4 x float> [[SELECT]], i32 0
+; CHECK-NEXT:    ret float [[EXTRACT]]
+;
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %select = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract = extractelement <4 x float> %select, i32 0
+  ret float %extract
+}
+
+; Multiple extractelements from a vector select
+define <2 x float> @extract_two_vselect(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @extract_two_vselect(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> %c, zeroinitializer
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[BUILD2:%.*]] = shufflevector <4 x float> [[SEL]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x float> [[BUILD2]]
+;
+  %cmp = icmp ne <4 x i32> %c, zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
+  %extract1 = extractelement <4 x float> %sel, i32 1
+  %extract2 = extractelement <4 x float> %sel, i32 2
+  %build1 = insertelement <2 x float> undef, float %extract1, i32 0
+  %build2 = insertelement <2 x float> %build1, float %extract2, i32 1
+  ret <2 x float> %build2
+}
+
+; The vector selects are not decomposed into scalar selects because that would increase
+; the instruction count. Extract+insert is converted to non-lane-crossing shuffles.
+; Test multiple extractelements
+define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_vector_select(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> %c, i32 0
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[A_SINK:%.*]] = select i1 [[TOBOOL]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 1
+; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[A_SINK1:%.*]] = select i1 [[TOBOOL1]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
+; CHECK-NEXT:    [[TOBOOL6:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[A_SINK2:%.*]] = select i1 [[TOBOOL6]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> %c, i32 3
+; CHECK-NEXT:    [[TOBOOL11:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    [[A_SINK3:%.*]] = select i1 [[TOBOOL11]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[A_SINK3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[TMP6]]
+;
+entry:
+  %0 = extractelement <4 x i32> %c, i32 0
+  %tobool = icmp ne i32 %0, 0
+  %a.sink = select i1 %tobool, <4 x float> %a, <4 x float> %b
+  %1 = extractelement <4 x float> %a.sink, i32 0
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %3 = extractelement <4 x i32> %c, i32 1
+  %tobool1 = icmp ne i32 %3, 0
+  %a.sink1 = select i1 %tobool1, <4 x float> %a, <4 x float> %b
+  %4 = extractelement <4 x float> %a.sink1, i32 1
+  %5 = insertelement <4 x float> %2, float %4, i32 1
+  %6 = extractelement <4 x i32> %c, i32 2
+  %tobool6 = icmp ne i32 %6, 0
+  %a.sink2 = select i1 %tobool6, <4 x float> %a, <4 x float> %b
+  %7 = extractelement <4 x float> %a.sink2, i32 2
+  %8 = insertelement <4 x float> %5, float %7, i32 2
+  %9 = extractelement <4 x i32> %c, i32 3
+  %tobool11 = icmp ne i32 %9, 0
+  %a.sink3 = select i1 %tobool11, <4 x float> %a, <4 x float> %b
+  %10 = extractelement <4 x float> %a.sink3, i32 3
+  %11 = insertelement <4 x float> %8, float %10, i32 3
+  ret <4 x float> %11
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/InstCombine/select-gep.ll b/llvm/test/Transforms/InstCombine/select-gep.ll
new file mode 100644
index 00000000000..72166b69e9f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-gep.ll
@@ -0,0 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32* @test1a(i32* %p, i32* %q) {
+; CHECK-LABEL: @test1a(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32* [[P:%.*]], [[Q:%.*]]
+; CHECK-NEXT:    [[SELECT_V:%.*]] = select i1 [[CMP]], i32* [[P]], i32* [[Q]]
+; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr i32, i32* [[SELECT_V]], i64 4
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr i32, i32* %p, i64 4
+  %gep2 = getelementptr i32, i32* %q, i64 4
+  %cmp = icmp ugt i32* %p, %q
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+define i32* @test1b(i32* %p, i32* %q) {
+; CHECK-LABEL: @test1b(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32* [[P:%.*]], [[Q:%.*]]
+; CHECK-NEXT:    [[SELECT_V:%.*]] = select i1 [[CMP]], i32* [[P]], i32* [[Q]]
+; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr i32, i32* [[SELECT_V]], i64 4
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr inbounds i32, i32* %p, i64 4
+  %gep2 = getelementptr i32, i32* %q, i64 4
+  %cmp = icmp ugt i32* %p, %q
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+define i32* @test1c(i32* %p, i32* %q) {
+; CHECK-LABEL: @test1c(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32* [[P:%.*]], [[Q:%.*]]
+; CHECK-NEXT:    [[SELECT_V:%.*]] = select i1 [[CMP]], i32* [[P]], i32* [[Q]]
+; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr i32, i32* [[SELECT_V]], i64 4
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr i32, i32* %p, i64 4
+  %gep2 = getelementptr inbounds i32, i32* %q, i64 4
+  %cmp = icmp ugt i32* %p, %q
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+define i32* @test1d(i32* %p, i32* %q) {
+; CHECK-LABEL: @test1d(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32* [[P:%.*]], [[Q:%.*]]
+; CHECK-NEXT:    [[SELECT_V:%.*]] = select i1 [[CMP]], i32* [[P]], i32* [[Q]]
+; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr inbounds i32, i32* [[SELECT_V]], i64 4
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr inbounds i32, i32* %p, i64 4
+  %gep2 = getelementptr inbounds i32, i32* %q, i64 4
+  %cmp = icmp ugt i32* %p, %q
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+define i32* @test2(i32* %p, i64 %x, i64 %y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SELECT_V:%.*]] = select i1 [[CMP]], i64 [[X]], i64 [[Y]]
+; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[SELECT_V]]
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr inbounds i32, i32* %p, i64 %x
+  %gep2 = getelementptr inbounds i32, i32* %p, i64 %y
+  %cmp = icmp ugt i64 %x, %y
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+; Three (or more) operand GEPs are currently expected to not be optimised,
+; though they could be in principle.
+
+define i32* @test3a([4 x i32]* %p, i64 %x, i64 %y) {
+; CHECK-LABEL: @test3a(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[P:%.*]], i64 2, i64 [[X:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[P]], i64 2, i64 [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[X]], [[Y]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32* [[GEP1]], i32* [[GEP2]]
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 2, i64 %x
+  %gep2 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 2, i64 %y
+  %cmp = icmp ugt i64 %x, %y
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+define i32* @test3b([4 x i32]* %p, i32* %q, i64 %x, i64 %y) {
+; CHECK-LABEL: @test3b(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[P:%.*]], i64 [[X:%.*]], i64 2
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[Q:%.*]], i64 [[X]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32* [[GEP1]], i32* [[GEP2]]
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 %x, i64 2
+  %gep2 = getelementptr inbounds i32, i32* %q, i64 %x
+  %cmp = icmp ugt i64 %x, %y
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+define i32* @test3c(i32* %p, [4 x i32]* %q, i64 %x, i64 %y) {
+; CHECK-LABEL: @test3c(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[X:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[Q:%.*]], i64 [[X]], i64 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32* [[GEP1]], i32* [[GEP2]]
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr inbounds i32, i32* %p, i64 %x
+  %gep2 = getelementptr inbounds [4 x i32], [4 x i32]* %q, i64 %x, i64 2
+  %cmp = icmp ugt i64 %x, %y
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+; Shouldn't be optimised as it would mean introducing an extra select
+
+define i32* @test4(i32* %p, i32* %q, i64 %x, i64 %y) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 [[X:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, i32* [[Q:%.*]], i64 [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[X]], [[Y]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32* [[GEP1]], i32* [[GEP2]]
+; CHECK-NEXT:    ret i32* [[SELECT]]
+;
+  %gep1 = getelementptr inbounds i32, i32* %p, i64 %x
+  %gep2 = getelementptr inbounds i32, i32* %q, i64 %y
+  %cmp = icmp ugt i64 %x, %y
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  ret i32* %select
+}
+
+; We cannot create a select with a vector condition but scalar operands.
+
+define <2 x i64*> @test5(i64* %p1, i64* %p2, <2 x i64> %idx, <2 x i1> %cc) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i64, i64* %p1, <2 x i64> %idx
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i64, i64* %p2, <2 x i64> %idx
+; CHECK-NEXT:    [[SELECT:%.*]] = select <2 x i1> %cc, <2 x i64*> [[GEP1]], <2 x i64*> [[GEP2]]
+; CHECK-NEXT:    ret <2 x i64*> [[SELECT]]
+;
+  %gep1 = getelementptr i64, i64* %p1, <2 x i64> %idx
+  %gep2 = getelementptr i64, i64* %p2, <2 x i64> %idx
+  %select = select <2 x i1> %cc, <2 x i64*> %gep1, <2 x i64*> %gep2
+  ret <2 x i64*> %select
+}
diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
new file mode 100644
index 00000000000..306f1384533
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
@@ -0,0 +1,620 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;; ((X & 27) ? 27 : 0)
+
+define i41 @test5(i41 %X) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[Y:%.*]] = and i41 [[X:%.*]], 32
+; CHECK-NEXT:    ret i41 [[Y]]
+;
+  %Y = and i41 %X, 32
+  %t = icmp ne i41 %Y, 0
+  %V = select i1 %t, i41 32, i41 0
+  ret i41 %V
+}
+
+;; ((X & 27) ? 27 : 0)
+
+define i1023 @test6(i1023 %X) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[Y:%.*]] = and i1023 [[X:%.*]], 64
+; CHECK-NEXT:    ret i1023 [[Y]]
+;
+  %Y = and i1023 %X, 64
+  %t = icmp ne i1023 %Y, 0
+  %V = select i1 %t, i1023 64, i1023 0
+  ret i1023 %V
+}
+
+define i32 @test35(i32 %x) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 60, i32 100
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sge i32 %x, 0
+  %cond = select i1 %cmp, i32 60, i32 100
+  ret i32 %cond
+}
+
+define <2 x i32> @test35vec(<2 x i32> %x) {
+; CHECK-LABEL: @test35vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %cmp = icmp sge <2 x i32> %x, <i32 0, i32 0>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+  ret <2 x i32> %cond
+}
+
+; Make sure we can still perform this optimization with a truncate present
+define i32 @test35_with_trunc(i64 %x) {
+; CHECK-LABEL: @test35_with_trunc(
+; CHECK-NEXT:    [[X1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X1]], -1
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 60, i32 100
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %x1 = trunc i64 %x to i32
+  %cmp = icmp sge i32 %x1, 0
+  %cond = select i1 %cmp, i32 60, i32 100
+  ret i32 %cond
+}
+
+define i32 @test36(i32 %x) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 60, i32 100
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %cond = select i1 %cmp, i32 60, i32 100
+  ret i32 %cond
+}
+
+define <2 x i32> @test36vec(<2 x i32> %x) {
+; CHECK-LABEL: @test36vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %cmp = icmp slt <2 x i32> %x, <i32 0, i32 0>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+  ret <2 x i32> %cond
+}
+
+define i32 @test37(i32 %x) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 1, i32 -1
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %cond = select i1 %cmp, i32 1, i32 -1
+  ret i32 %cond
+}
+
+define <2 x i32> @test37vec(<2 x i32> %x) {
+; CHECK-LABEL: @test37vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> <i32 1, i32 1>, <2 x i32> <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %cmp = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 1, i32 1>, <2 x i32> <i32 -1, i32 -1>
+  ret <2 x i32> %cond
+}
+
+define i32 @test65(i64 %x) {
+; CHECK-LABEL: @test65(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 16
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i64 %x, 16
+  %2 = icmp ne i64 %1, 0
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+}
+
+define <2 x i32> @test65vec(<2 x i64> %x) {
+; CHECK-LABEL: @test65vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 16, i64 16>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %1 = and <2 x i64> %x, <i64 16, i64 16>
+  %2 = icmp ne <2 x i64> %1, zeroinitializer
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
+define i32 @test66(i64 %x) {
+; CHECK-LABEL: @test66(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i64 %x, 4294967296
+  %2 = icmp ne i64 %1, 0
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+}
+
+define <2 x i32> @test66vec(<2 x i64> %x) {
+; CHECK-LABEL: @test66vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 4294967296, i64 4294967296>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %1 = and <2 x i64> %x, <i64 4294967296, i64 4294967296>
+  %2 = icmp ne <2 x i64> %1, zeroinitializer
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
+; Make sure we don't try to optimize a scalar 'and' with a vector select.
+define <2 x i32> @test66vec_scalar_and(i64 %x) {
+; CHECK-LABEL: @test66vec_scalar_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %1 = and i64 %x, 4294967296
+  %2 = icmp ne i64 %1, 0
+  %3 = select i1 %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
+define i32 @test67(i16 %x) {
+; CHECK-LABEL: @test67(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i16 %x, 4
+  %2 = icmp ne i16 %1, 0
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+}
+
+define <2 x i32> @test67vec(<2 x i16> %x) {
+; CHECK-LABEL: @test67vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 4, i16 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %1 = and <2 x i16> %x, <i16 4, i16 4>
+  %2 = icmp ne <2 x i16> %1, zeroinitializer
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
+define i32 @test71(i32 %x) {
+; CHECK-LABEL: @test71(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 42, i32 40
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i32 %x, 128
+  %2 = icmp ne i32 %1, 0
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+}
+
+define <2 x i32> @test71vec(<2 x i32> %x) {
+; CHECK-LABEL: @test71vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %1 = and <2 x i32> %x, <i32 128, i32 128>
+  %2 = icmp ne <2 x i32> %1, <i32 0, i32 0>
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
+define i32 @test72(i32 %x) {
+; CHECK-LABEL: @test72(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 40, i32 42
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i32 %x, 128
+  %2 = icmp eq i32 %1, 0
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+}
+
+define <2 x i32> @test72vec(<2 x i32> %x) {
+; CHECK-LABEL: @test72vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %1 = and <2 x i32> %x, <i32 128, i32 128>
+  %2 = icmp eq <2 x i32> %1, <i32 0, i32 0>
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
+define i32 @test73(i32 %x) {
+; CHECK-LABEL: @test73(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[TMP1]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 40, i32 42
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = trunc i32 %x to i8
+  %2 = icmp sgt i8 %1, -1
+  %3 = select i1 %2, i32 40, i32 42
+  ret i32 %3
+}
+
+define <2 x i32> @test73vec(<2 x i32> %x) {
+; CHECK-LABEL: @test73vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %1 = trunc <2 x i32> %x to <2 x i8>
+  %2 = icmp sgt <2 x i8> %1, <i8 -1, i8 -1>
+  %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %3
+}
+
+define i32 @test74(i32 %x) {
+; CHECK-LABEL: @test74(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 40, i32 42
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %1 = icmp sgt i32 %x, -1
+  %2 = select i1 %1, i32 40, i32 42
+  ret i32 %2
+}
+
+define <2 x i32> @test74vec(<2 x i32> %x) {
+; CHECK-LABEL: @test74vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %1 = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
+  %2 = select <2 x i1> %1, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+  ret <2 x i32> %2
+}
+
+;; Code sequence for (X & 16) ? 16 : 0
+define i32 @test15a(i32 %X) {
+; CHECK-LABEL: @test15a(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 16
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t1 = and i32 %X, 16
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = select i1 %t2, i32 0, i32 16
+  ret i32 %t3
+}
+
+;; Code sequence for (X & 32) ? 0 : 24
+define i32 @test15b(i32 %X) {
+; CHECK-LABEL: @test15b(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[T1]], 32
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = and i32 %X, 32
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = select i1 %t2, i32 32, i32 0
+  ret i32 %t3
+}
+
+;; Alternate code sequence for (X & 16) ? 16 : 0
+define i32 @test15c(i32 %X) {
+; CHECK-LABEL: @test15c(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 16
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t1 = and i32 %X, 16
+  %t2 = icmp eq i32 %t1, 16
+  %t3 = select i1 %t2, i32 16, i32 0
+  ret i32 %t3
+}
+
+;; Alternate code sequence for (X & 16) ? 16 : 0
+define i32 @test15d(i32 %X) {
+; CHECK-LABEL: @test15d(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 16
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t1 = and i32 %X, 16
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 16, i32 0
+  ret i32 %t3
+}
+
+;; (a & 128) ? 256 : 0
+define i32 @test15e(i32 %X) {
+; CHECK-LABEL: @test15e(
+; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[T1]], 256
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = and i32 %X, 128
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 256, i32 0
+  ret i32 %t3
+}
+
+;; (a & 128) ? 0 : 256
+define i32 @test15f(i32 %X) {
+; CHECK-LABEL: @test15f(
+; CHECK-NEXT:    [[T1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[T1]], 256
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 256
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %t1 = and i32 %X, 128
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 0, i32 256
+  ret i32 %t3
+}
+
+;; (a & 8) ? -1 : -9
+define i32 @test15g(i32 %X) {
+; CHECK-LABEL: @test15g(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], -9
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = and i32 %X, 8
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 -1, i32 -9
+  ret i32 %t3
+}
+
+;; (a & 8) ? -9 : -1
+define i32 @test15h(i32 %X) {
+; CHECK-LABEL: @test15h(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[T1]], -1
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = and i32 %X, 8
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 -9, i32 -1
+  ret i32 %t3
+}
+
+;; (a & 2) ? 577 : 1089
+define i32 @test15i(i32 %X) {
+; CHECK-LABEL: @test15i(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2]], i32 1089, i32 577
+; CHECK-NEXT:    ret i32 [[T3]]
+;
+  %t1 = and i32 %X, 2
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 577, i32 1089
+  ret i32 %t3
+}
+
+;; (a & 2) ? 1089 : 577
+define i32 @test15j(i32 %X) {
+; CHECK-LABEL: @test15j(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2]], i32 577, i32 1089
+; CHECK-NEXT:    ret i32 [[T3]]
+;
+  %t1 = and i32 %X, 2
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 1089, i32 577
+  ret i32 %t3
+}
+
+declare void @use1(i1)
+
+; (X & 8) == 0 ? -3 : -11 --> (X & 8) ^ -3
+; Extra cmp use ensures that cmp predicate canonicalization is thwarted.
+
+define i32 @clear_to_set(i32 %x) {
+; CHECK-LABEL: @clear_to_set(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[T1]], -3
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = and i32 %x, 8
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = select i1 %t2, i32 -3, i32 -11
+  call void @use1(i1 %t2)
+  ret i32 %t3
+}
+
+; (X & 8) == 0 ? -11 : -3 --> (X & 8) | -11
+; Extra cmp use ensures that cmp predicate canonicalization is thwarted.
+
+define i32 @clear_to_clear(i32 %x) {
+; CHECK-LABEL: @clear_to_clear(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[T1]], -11
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = and i32 %x, 8
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = select i1 %t2, i32 -11, i32 -3
+  call void @use1(i1 %t2)
+  ret i32 %t3
+}
+
+; (X & 8) != 0 ? -3 : -11 --> (X & 8) | -11
+; Extra cmp use ensures that cmp predicate canonicalization is thwarted.
+
+define i32 @set_to_set(i32 %x) {
+; CHECK-LABEL: @set_to_set(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[T2:%.*]] = icmp ne i32 [[T1]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[T1]], -11
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = and i32 %x, 8
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 -3, i32 -11
+  call void @use1(i1 %t2)
+  ret i32 %t3
+}
+
+; (X & 8) != 0 ? -3 : -11 --> (X & 8) ^ -3
+; Extra cmp use ensures that cmp predicate canonicalization is thwarted.
+
+define i32 @set_to_clear(i32 %x) {
+; CHECK-LABEL: @set_to_clear(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[T2:%.*]] = icmp ne i32 [[T1]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[T1]], -3
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = and i32 %x, 8
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 -11, i32 -3
+  call void @use1(i1 %t2)
+  ret i32 %t3
+}
+
+; (X & 128) == 0 ? 131 : 3 --> (X & 128) ^ 131
+
+define i8 @clear_to_set_decomposebittest(i8 %x) {
+; CHECK-LABEL: @clear_to_set_decomposebittest(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -128
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[TMP1]], -125
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %t2 = icmp sgt i8 %x, -1
+  %t3 = select i1 %t2, i8 131, i8 3
+  ret i8 %t3
+}
+
+; (X & 128) == 0 ? 3 : 131 --> (X & 128) | 3
+
+define i8 @clear_to_clear_decomposebittest(i8 %x) {
+; CHECK-LABEL: @clear_to_clear_decomposebittest(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -128
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[TMP1]], 3
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %t2 = icmp sgt i8 %x, -1
+  %t3 = select i1 %t2, i8 3, i8 131
+  ret i8 %t3
+}
+
+; (X & 128) != 0 ? 131 : 3 --> (X & 128) | 3
+
+define i8 @set_to_set_decomposebittest(i8 %x) {
+; CHECK-LABEL: @set_to_set_decomposebittest(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -128
+; CHECK-NEXT:    [[TMP2:%.*]] = or i8 [[TMP1]], 3
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %t2 = icmp slt i8 %x, 0
+  %t3 = select i1 %t2, i8 131, i8 3
+  ret i8 %t3
+}
+
+; (X & 128) != 0 ? 3 : 131 --> (X & 128) ^ 131
+
+define i8 @set_to_clear_decomposebittest(i8 %x) {
+; CHECK-LABEL: @set_to_clear_decomposebittest(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -128
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[TMP1]], -125
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+  %t2 = icmp slt i8 %x, 0
+  %t3 = select i1 %t2, i8 3, i8 131
+  ret i8 %t3
+}
+
+; (X & 128) == 0 ? 131 : 3 --> (X & 128) ^ 131
+; Extra cmp use to verify that we are not creating extra instructions.
+
+define i8 @clear_to_set_decomposebittest_extra_use(i8 %x) {
+; CHECK-LABEL: @clear_to_set_decomposebittest_extra_use(
+; CHECK-NEXT:    [[T2:%.*]] = icmp sgt i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2]], i8 -125, i8 3
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    ret i8 [[T3]]
+;
+  %t2 = icmp sgt i8 %x, -1
+  %t3 = select i1 %t2, i8 131, i8 3
+  call void @use1(i1 %t2)
+  ret i8 %t3
+}
+
+; (X & 128) == 0 ? 3 : 131 --> (X & 128) | 3
+; Extra cmp use to verify that we are not creating extra instructions.
+
+define i8 @clear_to_clear_decomposebittest_extra_use(i8 %x) {
+; CHECK-LABEL: @clear_to_clear_decomposebittest_extra_use(
+; CHECK-NEXT:    [[T2:%.*]] = icmp sgt i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2]], i8 3, i8 -125
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    ret i8 [[T3]]
+;
+  %t2 = icmp sgt i8 %x, -1
+  %t3 = select i1 %t2, i8 3, i8 131
+  call void @use1(i1 %t2)
+  ret i8 %t3
+}
+
+; (X & 128) != 0 ? 131 : 3 --> (X & 128) | 3
+; Extra cmp use to verify that we are not creating extra instructions.
+
+define i8 @set_to_set_decomposebittest_extra_use(i8 %x) {
+; CHECK-LABEL: @set_to_set_decomposebittest_extra_use(
+; CHECK-NEXT:    [[T2:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2]], i8 -125, i8 3
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    ret i8 [[T3]]
+;
+  %t2 = icmp slt i8 %x, 0
+  %t3 = select i1 %t2, i8 131, i8 3
+  call void @use1(i1 %t2)
+  ret i8 %t3
+}
+
+; (X & 128) != 0 ? 3 : 131 --> (X & 128) ^ 131
+; Extra cmp use to verify that we are not creating extra instructions.
+
+define i8 @set_to_clear_decomposebittest_extra_use(i8 %x) {
+; CHECK-LABEL: @set_to_clear_decomposebittest_extra_use(
+; CHECK-NEXT:    [[T2:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2]], i8 3, i8 -125
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    ret i8 [[T3]]
+;
+  %t2 = icmp slt i8 %x, 0
+  %t3 = select i1 %t2, i8 3, i8 131
+  call void @use1(i1 %t2)
+  ret i8 %t3
+}
+
diff --git a/llvm/test/Transforms/InstCombine/select-load-call.ll b/llvm/test/Transforms/InstCombine/select-load-call.ll
new file mode 100644
index 00000000000..ad0ef4f85eb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-load-call.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | grep "ret i32 1"
+
+declare void @test2()
+
+define i32 @test(i1 %cond, i32 *%P) {
+  %A = alloca i32
+  store i32 1, i32* %P
+  store i32 1, i32* %A
+
+  call void @test2() readonly
+
+  %P2 = select i1 %cond, i32 *%P, i32* %A
+  %V = load i32, i32* %P2
+  ret i32 %V
+}
diff --git a/llvm/test/Transforms/InstCombine/select-obo-peo-ops.ll b/llvm/test/Transforms/InstCombine/select-obo-peo-ops.ll
new file mode 100644
index 00000000000..c57904ab94b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-obo-peo-ops.ll
@@ -0,0 +1,1143 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i64 @test_shl_nuw_nsw__all_are_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nuw_nsw__all_are_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 15
+  %2 = shl nuw nsw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl_nuw__all_are_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nuw__all_are_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 15
+  %2 = shl nuw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl_nsw__all_are_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nsw__all_are_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 15
+  %2 = shl nsw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl__all_are_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl__all_are_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 15
+  %2 = shl i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl_nuw_nsw__nuw_is_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nuw_nsw__nuw_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1073741822
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 1073741822
+  %2 = shl nuw nsw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl_nuw__nuw_is_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nuw__nuw_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1073741822
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 1073741822
+  %2 = shl nuw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl_nsw__nuw_is_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nsw__nuw_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1073741822
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 1073741822
+  %2 = shl nsw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl__nuw_is_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl__nuw_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1073741822
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 1073741822
+  %2 = shl i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i32 @test_shl_nuw_nsw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_shl_nuw_nsw__nsw_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], -83886080
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], -83886079
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i32 -335544316, i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %1 = or i32 %x, -83886080
+  %2 = icmp eq i32 %1, -83886079
+  %3 = shl nuw nsw i32 %1, 2
+  %4 = select i1 %2, i32 -335544316, i32 %3
+  %5 = mul i32 %4, %1
+  %6 = mul i32 %5, %3
+  ret i32 %6
+}
+
+define i32 @test_shl_nuw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_shl_nuw__nsw_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], -83886080
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], -83886079
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i32 -335544316, i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %1 = or i32 %x, -83886080
+  %2 = icmp eq i32 %1, -83886079
+  %3 = shl nuw i32 %1, 2
+  %4 = select i1 %2, i32 -335544316, i32 %3
+  %5 = mul i32 %4, %1
+  %6 = mul i32 %5, %3
+  ret i32 %6
+}
+
+define i32 @test_shl_nsw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_shl_nsw__nsw_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], -83886080
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], -83886079
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i32 -335544316, i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %1 = or i32 %x, -83886080
+  %2 = icmp eq i32 %1, -83886079
+  %3 = shl nsw i32 %1, 2
+  %4 = select i1 %2, i32 -335544316, i32 %3
+  %5 = mul i32 %4, %1
+  %6 = mul i32 %5, %3
+  ret i32 %6
+}
+
+define i32 @test_shl__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_shl__nsw_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], -83886080
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], -83886079
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i32 -335544316, i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %1 = or i32 %x, -83886080
+  %2 = icmp eq i32 %1, -83886079
+  %3 = shl i32 %1, 2
+  %4 = select i1 %2, i32 -335544316, i32 %3
+  %5 = mul i32 %4, %1
+  %6 = mul i32 %5, %3
+  ret i32 %6
+}
+
+
+define i64 @test_shl_nuw_nsw__none_are_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nuw_nsw__none_are_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 4294967294
+  %2 = shl nuw nsw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl_nuw__none_are_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nuw__none_are_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 4294967294
+  %2 = shl nuw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl_nsw__none_are_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl_nsw__none_are_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 4294967294
+  %2 = shl nsw i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_shl__none_are_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_shl__none_are_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], -8
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %1 = and i32 %x, 4294967294
+  %2 = shl i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_lshr_exact__exact_is_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_lshr_exact__exact_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 60
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 60
+  %2 = lshr exact i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_lshr__exact_is_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_lshr__exact_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 60
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 60
+  %2 = lshr i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_lshr_exact__exact_is_unsafe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_lshr_exact__exact_is_unsafe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 63
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, 63
+  %2 = lshr exact i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_lshr__exact_is_unsafe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_lshr__exact_is_unsafe(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %1 = and i32 %x, 63
+  %2 = lshr i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_ashr_exact__exact_is_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_ashr_exact__exact_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2147483588
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, -2147483588
+  %2 = ashr exact i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_ashr__exact_is_safe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_ashr__exact_is_safe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2147483588
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, -2147483588
+  %2 = ashr i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_ashr_exact__exact_is_unsafe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_ashr_exact__exact_is_unsafe(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2147483585
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %1 = and i32 %x, -2147483585
+  %2 = ashr exact i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i64 @test_ashr__exact_is_unsafe(i32 %x, i64 %y) {
+; CHECK-LABEL: @test_ashr__exact_is_unsafe(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], -536870897
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %1 = and i32 %x, -2147483585
+  %2 = ashr i32 %1, 2
+  %3 = zext i32 %2 to i64
+  %4 = icmp eq i32 %1, 0
+  %5 = ashr i64 %y, %3
+  %6 = select i1 %4, i64 %y, i64 %5
+  ret i64 %6
+}
+
+define i32 @test_add_nuw_nsw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nuw_nsw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1073741823
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 3
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[AND]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 4, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 1073741823
+  %cmp = icmp eq i32 %and, 3
+  %add = add nuw nsw i32 %and, 1
+  %sel = select i1 %cmp, i32 4, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nuw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nuw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1073741823
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 3
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[AND]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 4, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 1073741823
+  %cmp = icmp eq i32 %and, 3
+  %add = add nuw i32 %and, 1
+  %sel = select i1 %cmp, i32 4, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nsw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nsw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1073741823
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 3
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[AND]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 4, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 1073741823
+  %cmp = icmp eq i32 %and, 3
+  %add = add nsw i32 %and, 1
+  %sel = select i1 %cmp, i32 4, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_add__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1073741823
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 3
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[AND]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 4, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 1073741823
+  %cmp = icmp eq i32 %and, 3
+  %add = add i32 %and, 1
+  %sel = select i1 %cmp, i32 4, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nuw_nsw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nuw_nsw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 2147483647
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[AND]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -2147483648, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2147483647
+  %cmp = icmp eq i32 %and, 2147483647
+  %add = add nuw nsw i32 %and, 1
+  %sel = select i1 %cmp, i32 -2147483648, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nuw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nuw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 2147483647
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[AND]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -2147483648, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2147483647
+  %cmp = icmp eq i32 %and, 2147483647
+  %add = add nuw i32 %and, 1
+  %sel = select i1 %cmp, i32 -2147483648, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nsw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nsw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 2147483647
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[AND]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -2147483648, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2147483647
+  %cmp = icmp eq i32 %and, 2147483647
+  %add = add nsw i32 %and, 1
+  %sel = select i1 %cmp, i32 -2147483648, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_add__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 2147483647
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[AND]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -2147483648, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2147483647
+  %cmp = icmp eq i32 %and, 2147483647
+  %add = add i32 %and, 1
+  %sel = select i1 %cmp, i32 -2147483648, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nuw_nsw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nuw_nsw__nsw_is_safe(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], -1
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[OR]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %or = or i32 %x, -2147483648
+  %cmp = icmp eq i32 %or, -1
+  %add = add nuw nsw i32 %or, 1
+  %sel = select i1 %cmp, i32 0, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nuw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nuw__nsw_is_safe(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], -1
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[OR]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %or = or i32 %x, -2147483648
+  %cmp = icmp eq i32 %or, -1
+  %add = add nuw i32 %or, 1
+  %sel = select i1 %cmp, i32 0, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nsw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nsw__nsw_is_safe(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], -1
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[OR]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %or = or i32 %x, -2147483648
+  %cmp = icmp eq i32 %or, -1
+  %add = add nsw i32 %or, 1
+  %sel = select i1 %cmp, i32 0, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_add__nsw_is_safe(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], -1
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[OR]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %or = or i32 %x, -2147483648
+  %cmp = icmp eq i32 %or, -1
+  %add = add i32 %or, 1
+  %sel = select i1 %cmp, i32 0, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nuw_nsw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nuw_nsw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[X]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 4, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 3
+  %add = add nuw nsw i32 %x, 1
+  %sel = select i1 %cmp, i32 4, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nuw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nuw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[X]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 4, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 3
+  %add = add nuw i32 %x, 1
+  %sel = select i1 %cmp, i32 4, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add_nsw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_add_nsw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 4, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 3
+  %add = add nsw i32 %x, 1
+  %sel = select i1 %cmp, i32 4, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_add__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_add__none_are_safe(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %cmp = icmp eq i32 %x, 3
+  %add = add i32 %x, 1
+  %sel = select i1 %cmp, i32 4, i32 %add
+  ret i32 %sel
+}
+
+define i32 @test_sub_nuw_nsw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nuw_nsw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 6
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -254, [[AND]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -260, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 255
+  %cmp = icmp eq i32 %and, 6
+  %sub = sub nuw nsw i32 -254, %and
+  %sel = select i1 %cmp, i32 -260, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nuw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nuw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 6
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -254, [[AND]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -260, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 255
+  %cmp = icmp eq i32 %and, 6
+  %sub = sub nuw i32 -254, %and
+  %sel = select i1 %cmp, i32 -260, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nsw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nsw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 6
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -254, [[AND]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -260, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 255
+  %cmp = icmp eq i32 %and, 6
+  %sub = sub nsw i32 -254, %and
+  %sel = select i1 %cmp, i32 -260, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_sub__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 6
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -254, [[AND]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -260, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 255
+  %cmp = icmp eq i32 %and, 6
+  %sub = sub i32 -254, %and
+  %sel = select i1 %cmp, i32 -260, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nuw_nsw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nuw_nsw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 1073741824
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -2147483648, [[AND]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 1073741824, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2147483647
+  %cmp = icmp eq i32 %and, 1073741824
+  %sub = sub nuw nsw i32 -2147483648, %and
+  %sel = select i1 %cmp, i32 1073741824, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nuw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nuw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 1073741824
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw i32 -2147483648, [[AND]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 1073741824, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2147483647
+  %cmp = icmp eq i32 %and, 1073741824
+  %sub = sub nuw i32 -2147483648, %and
+  %sel = select i1 %cmp, i32 1073741824, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nsw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nsw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 1073741824
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -2147483648, [[AND]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 1073741824, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2147483647
+  %cmp = icmp eq i32 %and, 1073741824
+  %sub = sub nsw i32 -2147483648, %and
+  %sel = select i1 %cmp, i32 1073741824, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_sub__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 1073741824
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw i32 -2147483648, [[AND]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 1073741824, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2147483647
+  %cmp = icmp eq i32 %and, 1073741824
+  %sub = sub i32 -2147483648, %and
+  %sel = select i1 %cmp, i32 1073741824, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nuw_nsw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nuw_nsw__nsw_is_safe(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], -2147483647
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -2147483648, [[OR]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %or = or i32 %x, -2147483648
+  %cmp = icmp eq i32 %or, -2147483647
+  %sub = sub nuw nsw i32 -2147483648, %or
+  %sel = select i1 %cmp, i32 -1, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nuw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nuw__nsw_is_safe(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], -2147483647
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -2147483648, [[OR]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %or = or i32 %x, -2147483648
+  %cmp = icmp eq i32 %or, -2147483647
+  %sub = sub nuw i32 -2147483648, %or
+  %sel = select i1 %cmp, i32 -1, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nsw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nsw__nsw_is_safe(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], -2147483647
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 -2147483648, [[OR]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %or = or i32 %x, -2147483648
+  %cmp = icmp eq i32 %or, -2147483647
+  %sub = sub nsw i32 -2147483648, %or
+  %sel = select i1 %cmp, i32 -1, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_sub__nsw_is_safe(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], -2147483647
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 -2147483648, [[OR]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %or = or i32 %x, -2147483648
+  %cmp = icmp eq i32 %or, -2147483647
+  %sub = sub i32 -2147483648, %or
+  %sel = select i1 %cmp, i32 -1, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nuw_nsw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nuw_nsw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 -2147483648, [[X]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 2147483647, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 1
+  %sub = sub nuw nsw i32 -2147483648, %x
+  %sel = select i1 %cmp, i32 2147483647, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nuw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nuw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw i32 -2147483648, [[X]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 2147483647, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 1
+  %sub = sub nuw i32 -2147483648, %x
+  %sel = select i1 %cmp, i32 2147483647, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub_nsw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_sub_nsw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 -2147483648, [[X]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 2147483647, i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 1
+  %sub = sub nsw i32 -2147483648, %x
+  %sel = select i1 %cmp, i32 2147483647, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_sub__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_sub__none_are_safe(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 -2147483648, [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %cmp = icmp eq i32 %x, 1
+  %sub = sub i32 -2147483648, %x
+  %sel = select i1 %cmp, i32 2147483647, i32 %sub
+  ret i32 %sel
+}
+
+define i32 @test_mul_nuw_nsw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nuw_nsw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 17
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 153, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 255
+  %cmp = icmp eq i32 %and, 17
+  %mul = mul nuw nsw i32 %and, 9
+  %sel = select i1 %cmp, i32 153, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nuw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nuw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 17
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 153, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 255
+  %cmp = icmp eq i32 %and, 17
+  %mul = mul nuw i32 %and, 9
+  %sel = select i1 %cmp, i32 153, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nsw__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nsw__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 17
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 153, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 255
+  %cmp = icmp eq i32 %and, 17
+  %mul = mul nsw i32 %and, 9
+  %sel = select i1 %cmp, i32 153, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul__all_are_safe(i32 %x) {
+; CHECK-LABEL: @test_mul__all_are_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 17
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 153, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 255
+  %cmp = icmp eq i32 %and, 17
+  %mul = mul i32 %and, 9
+  %sel = select i1 %cmp, i32 153, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nuw_nsw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nuw_nsw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 268435457
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 268435456
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1879048192, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 268435457
+  %cmp = icmp eq i32 %and, 268435456
+  %mul = mul nuw nsw i32 %and, 9
+  %sel = select i1 %cmp, i32 -1879048192, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nuw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nuw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 268435457
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 268435456
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1879048192, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 268435457
+  %cmp = icmp eq i32 %and, 268435456
+  %mul = mul nuw i32 %and, 9
+  %sel = select i1 %cmp, i32 -1879048192, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nsw__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nsw__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 268435457
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 268435456
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1879048192, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 268435457
+  %cmp = icmp eq i32 %and, 268435456
+  %mul = mul nsw i32 %and, 9
+  %sel = select i1 %cmp, i32 -1879048192, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul__nuw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_mul__nuw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 268435457
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 268435456
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1879048192, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 268435457
+  %cmp = icmp eq i32 %and, 268435456
+  %mul = mul i32 %and, 9
+  %sel = select i1 %cmp, i32 -1879048192, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nuw_nsw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nuw_nsw__nsw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = or i32 [[X:%.*]], -83886080
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], -83886079
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -754974711, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = or i32 %x, -83886080
+  %cmp = icmp eq i32 %and, -83886079
+  %mul = mul nuw nsw i32 %and, 9
+  %sel = select i1 %cmp, i32 -754974711, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nuw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nuw__nsw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = or i32 [[X:%.*]], -83886080
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], -83886079
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -754974711, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = or i32 %x, -83886080
+  %cmp = icmp eq i32 %and, -83886079
+  %mul = mul nuw i32 %and, 9
+  %sel = select i1 %cmp, i32 -754974711, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nsw__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nsw__nsw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = or i32 [[X:%.*]], -83886080
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], -83886079
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -754974711, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = or i32 %x, -83886080
+  %cmp = icmp eq i32 %and, -83886079
+  %mul = mul nsw i32 %and, 9
+  %sel = select i1 %cmp, i32 -754974711, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul__nsw_is_safe(i32 %x) {
+; CHECK-LABEL: @test_mul__nsw_is_safe(
+; CHECK-NEXT:    [[AND:%.*]] = or i32 [[X:%.*]], -83886080
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], -83886079
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[AND]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -754974711, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = or i32 %x, -83886080
+  %cmp = icmp eq i32 %and, -83886079
+  %mul = mul i32 %and, 9
+  %sel = select i1 %cmp, i32 -754974711, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nuw_nsw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nuw_nsw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 805306368
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[X]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1342177280, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 805306368
+  %mul = mul nuw nsw i32 %x, 9
+  %sel = select i1 %cmp, i32 -1342177280, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nuw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nuw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 805306368
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i32 [[X]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1342177280, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 805306368
+  %mul = mul nuw i32 %x, 9
+  %sel = select i1 %cmp, i32 -1342177280, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul_nsw__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_mul_nsw__none_are_safe(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 805306368
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[X]], 9
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -1342177280, i32 [[MUL]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 805306368
+  %mul = mul nsw i32 %x, 9
+  %sel = select i1 %cmp, i32 -1342177280, i32 %mul
+  ret i32 %sel
+}
+
+define i32 @test_mul__none_are_safe(i32 %x) {
+; CHECK-LABEL: @test_mul__none_are_safe(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[X:%.*]], 9
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %cmp = icmp eq i32 %x, 805306368
+  %mul = mul i32 %x, 9
+  %sel = select i1 %cmp, i32 -1342177280, i32 %mul
+  ret i32 %sel
+}
diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
new file mode 100644
index 00000000000..d9bef00b2f7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
@@ -0,0 +1,654 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=36950
+
+; These all should be just and+icmp, there should be no select.
+
+define i32 @and_lshr_and(i32 %arg) {
+; CHECK-LABEL: @and_lshr_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARG:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg, 1
+  %tmp1 = icmp eq i32 %tmp, 0
+  %tmp2 = lshr i32 %arg, 1
+  %tmp3 = and i32 %tmp2, 1
+  %tmp4 = select i1 %tmp1, i32 %tmp3, i32 1
+  ret i32 %tmp4
+}
+
+define <2 x i32> @and_lshr_and_splatvec(<2 x i32> %arg) {
+; CHECK-LABEL: @and_lshr_and_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
+;
+  %tmp = and <2 x i32> %arg, <i32 1, i32 1>
+  %tmp1 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp2 = lshr <2 x i32> %arg, <i32 1, i32 1>
+  %tmp3 = and <2 x i32> %tmp2, <i32 1, i32 1>
+  %tmp4 = select <2 x i1> %tmp1, <2 x i32> %tmp3, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @and_lshr_and_vec_v0(<2 x i32> %arg) {
+; CHECK-LABEL: @and_lshr_and_vec_v0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
+;
+  %tmp = and <2 x i32> %arg, <i32 1, i32 4> ; mask is not splat
+  %tmp1 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp2 = lshr <2 x i32> %arg, <i32 1, i32 1>
+  %tmp3 = and <2 x i32> %tmp2, <i32 1, i32 1>
+  %tmp4 = select <2 x i1> %tmp1, <2 x i32> %tmp3, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @and_lshr_and_vec_v1(<2 x i32> %arg) {
+; CHECK-LABEL: @and_lshr_and_vec_v1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
+;
+  %tmp = and <2 x i32> %arg, <i32 1, i32 1>
+  %tmp1 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp2 = lshr <2 x i32> %arg, <i32 1, i32 2> ; shift is not splat
+  %tmp3 = and <2 x i32> %tmp2, <i32 1, i32 1>
+  %tmp4 = select <2 x i1> %tmp1, <2 x i32> %tmp3, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @and_lshr_and_vec_v2(<2 x i32> %arg) {
+; CHECK-LABEL: @and_lshr_and_vec_v2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 12, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
+;
+  %tmp = and <2 x i32> %arg, <i32 8, i32 1> ; mask is not splat
+  %tmp1 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp2 = lshr <2 x i32> %arg, <i32 2, i32 1> ; shift is not splat
+  %tmp3 = and <2 x i32> %tmp2, <i32 1, i32 1>
+  %tmp4 = select <2 x i1> %tmp1, <2 x i32> %tmp3, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp4
+}
+
+define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) {
+; CHECK-LABEL: @and_lshr_and_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 3, i32 undef, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <3 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <3 x i1> [[TMP2]] to <3 x i32>
+; CHECK-NEXT:    ret <3 x i32> [[TMP4]]
+;
+  %tmp = and <3 x i32> %arg, <i32 1, i32 undef, i32 1>
+  %tmp1 = icmp eq <3 x i32> %tmp, <i32 0, i32 undef, i32 0>
+  %tmp2 = lshr <3 x i32> %arg, <i32 1, i32 undef, i32 1>
+  %tmp3 = and <3 x i32> %tmp2, <i32 1, i32 undef, i32 1>
+  %tmp4 = select <3 x i1> %tmp1, <3 x i32> %tmp3, <3 x i32> <i32 1, i32 undef, i32 1>
+  ret <3 x i32> %tmp4
+}
+
+define i32 @and_and(i32 %arg) {
+; CHECK-LABEL: @and_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARG:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = and i32 %arg, 2
+  %tmp1 = icmp eq i32 %tmp, 0
+  %tmp2 = and i32 %arg, 1
+  %tmp3 = select i1 %tmp1, i32 %tmp2, i32 1
+  ret i32 %tmp3
+}
+
+define <2 x i32> @and_and_splatvec(<2 x i32> %arg) {
+; CHECK-LABEL: @and_and_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %tmp = and <2 x i32> %arg, <i32 2, i32 2>
+  %tmp1 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp2 = and <2 x i32> %arg, <i32 1, i32 1>
+  %tmp3 = select <2 x i1> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @and_and_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @and_and_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 7, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %tmp = and <2 x i32> %arg, <i32 6, i32 2> ; mask is not splat
+  %tmp1 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp2 = and <2 x i32> %arg, <i32 1, i32 1>
+  %tmp3 = select <2 x i1> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp3
+}
+
+define <3 x i32> @and_and_vec_undef(<3 x i32> %arg) {
+; CHECK-LABEL: @and_and_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 3, i32 -1, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <3 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <3 x i1> [[TMP2]] to <3 x i32>
+; CHECK-NEXT:    ret <3 x i32> [[TMP3]]
+;
+  %tmp = and <3 x i32> %arg, <i32 2, i32 undef, i32 2>
+  %tmp1 = icmp eq <3 x i32> %tmp, <i32 0, i32 undef, i32 0>
+  %tmp2 = and <3 x i32> %arg, <i32 1, i32 undef, i32 1>
+  %tmp3 = select <3 x i1> %tmp1, <3 x i32> %tmp2, <3 x i32> <i32 1, i32 undef, i32 1>
+  ret <3 x i32> %tmp3
+}
+
+; ============================================================================ ;
+; Mask can be a variable, too.
+; ============================================================================ ;
+
+define i32 @f_var0(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: @f_var0(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[ARG1:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %tmp = and i32 %arg, %arg1
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = lshr i32 %arg, 1
+  %tmp4 = and i32 %tmp3, 1
+  %tmp5 = select i1 %tmp2, i32 %tmp4, i32 1
+  ret i32 %tmp5
+}
+
+; Should be exactly as the previous one
+define i32 @f_var0_commutative_and(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: @f_var0_commutative_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[ARG1:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %tmp = and i32 %arg1, %arg ; in different order
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = lshr i32 %arg, 1
+  %tmp4 = and i32 %tmp3, 1
+  %tmp5 = select i1 %tmp2, i32 %tmp4, i32 1
+  ret i32 %tmp5
+}
+
+define <2 x i32> @f_var0_splatvec(<2 x i32> %arg, <2 x i32> %arg1) {
+; CHECK-LABEL: @f_var0_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %tmp = and <2 x i32> %arg, %arg1
+  %tmp2 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp3 = lshr <2 x i32> %arg, <i32 1, i32 1>
+  %tmp4 = and <2 x i32> %tmp3, <i32 1, i32 1>
+  %tmp5 = select <2 x i1> %tmp2, <2 x i32> %tmp4, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp5
+}
+
+define <2 x i32> @f_var0_vec(<2 x i32> %arg, <2 x i32> %arg1) {
+; CHECK-LABEL: @f_var0_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], <i32 2, i32 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %tmp = and <2 x i32> %arg, %arg1
+  %tmp2 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp3 = lshr <2 x i32> %arg, <i32 1, i32 2> ; shift is not splat
+  %tmp4 = and <2 x i32> %tmp3, <i32 1, i32 1>
+  %tmp5 = select <2 x i1> %tmp2, <2 x i32> %tmp4, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp5
+}
+
+define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) {
+; CHECK-LABEL: @f_var0_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], <i32 2, i32 undef, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32>
+; CHECK-NEXT:    ret <3 x i32> [[TMP5]]
+;
+  %tmp = and <3 x i32> %arg, %arg1
+  %tmp2 = icmp eq <3 x i32> %tmp, <i32 0, i32 undef, i32 0>
+  %tmp3 = lshr <3 x i32> %arg, <i32 1, i32 undef, i32 1>
+  %tmp4 = and <3 x i32> %tmp3, <i32 1, i32 undef, i32 1>
+  %tmp5 = select <3 x i1> %tmp2, <3 x i32> %tmp4, <3 x i32> <i32 1, i32 undef, i32 1>
+  ret <3 x i32> %tmp5
+}
+
+define i32 @f_var1(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: @f_var1(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[ARG1:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg, %arg1
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = and i32 %arg, 1
+  %tmp4 = select i1 %tmp2, i32 %tmp3, i32 1
+  ret i32 %tmp4
+}
+
+; Should be exactly as the previous one
+define i32 @f_var1_commutative_and(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: @f_var1_commutative_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[ARG1:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg1, %arg ; in different order
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = and i32 %arg, 1
+  %tmp4 = select i1 %tmp2, i32 %tmp3, i32 1
+  ret i32 %tmp4
+}
+
+define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) {
+; CHECK-LABEL: @f_var1_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
+;
+  %tmp = and <2 x i32> %arg, %arg1
+  %tmp2 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp3 = and <2 x i32> %arg, <i32 1, i32 1>
+  %tmp4 = select <2 x i1> %tmp2, <2 x i32> %tmp3, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp4
+}
+
+define <3 x i32> @f_var1_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) {
+; CHECK-LABEL: @f_var1_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32>
+; CHECK-NEXT:    ret <3 x i32> [[TMP4]]
+;
+  %tmp = and <3 x i32> %arg, %arg1
+  %tmp2 = icmp eq <3 x i32> %tmp, <i32 0, i32 undef, i32 0>
+  %tmp3 = and <3 x i32> %arg, <i32 1, i32 undef, i32 1>
+  %tmp4 = select <3 x i1> %tmp2, <3 x i32> %tmp3, <3 x i32> <i32 1, i32 undef, i32 1>
+  ret <3 x i32> %tmp4
+}
+
+; ============================================================================ ;
+; Shift can be a variable, too.
+; ============================================================================ ;
+
+define i32 @f_var2(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: @f_var2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 1, [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %tmp = and i32 %arg, 1
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = lshr i32 %arg, %arg1
+  %tmp4 = and i32 %tmp3, 1
+  %tmp5 = select i1 %tmp2, i32 %tmp4, i32 1
+  ret i32 %tmp5
+}
+
+define <2 x i32> @f_var2_splatvec(<2 x i32> %arg, <2 x i32> %arg1) {
+; CHECK-LABEL: @f_var2_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %tmp = and <2 x i32> %arg, <i32 1, i32 1>
+  %tmp2 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp3 = lshr <2 x i32> %arg, %arg1
+  %tmp4 = and <2 x i32> %tmp3, <i32 1, i32 1>
+  %tmp5 = select <2 x i1> %tmp2, <2 x i32> %tmp4, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp5
+}
+
+define <2 x i32> @f_var2_vec(<2 x i32> %arg, <2 x i32> %arg1) {
+; CHECK-LABEL: @f_var2_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %tmp = and <2 x i32> %arg, <i32 2, i32 1>; mask is not splat
+  %tmp2 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp3 = lshr <2 x i32> %arg, %arg1
+  %tmp4 = and <2 x i32> %tmp3, <i32 1, i32 1>
+  %tmp5 = select <2 x i1> %tmp2, <2 x i32> %tmp4, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp5
+}
+
+define <3 x i32> @f_var2_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) {
+; CHECK-LABEL: @f_var2_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <3 x i32> <i32 1, i32 1, i32 1>, [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <3 x i32> [[TMP1]], <i32 1, i32 undef, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <3 x i32> [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <3 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <3 x i1> [[TMP4]] to <3 x i32>
+; CHECK-NEXT:    ret <3 x i32> [[TMP5]]
+;
+  %tmp = and <3 x i32> %arg, <i32 1, i32 undef, i32 1>
+  %tmp2 = icmp eq <3 x i32> %tmp, <i32 0, i32 undef, i32 0>
+  %tmp3 = lshr <3 x i32> %arg, %arg1
+  %tmp4 = and <3 x i32> %tmp3, <i32 1, i32 undef, i32 1>
+  %tmp5 = select <3 x i1> %tmp2, <3 x i32> %tmp4, <3 x i32> <i32 1, i32 undef, i32 1>
+  ret <3 x i32> %tmp5
+}
+
+; ============================================================================ ;
+; The worst case: both Mask and Shift are variables
+; ============================================================================ ;
+
+define i32 @f_var3(i32 %arg, i32 %arg1, i32 %arg2) {
+; CHECK-LABEL: @f_var3(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 1, [[ARG2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %tmp = and i32 %arg, %arg1
+  %tmp3 = icmp eq i32 %tmp, 0
+  %tmp4 = lshr i32 %arg, %arg2
+  %tmp5 = and i32 %tmp4, 1
+  %tmp6 = select i1 %tmp3, i32 %tmp5, i32 1
+  ret i32 %tmp6
+}
+
+; Should be exactly as the previous one
+define i32 @f_var3_commutative_and(i32 %arg, i32 %arg1, i32 %arg2) {
+; CHECK-LABEL: @f_var3_commutative_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 1, [[ARG2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %tmp = and i32 %arg1, %arg ; in different order
+  %tmp3 = icmp eq i32 %tmp, 0
+  %tmp4 = lshr i32 %arg, %arg2
+  %tmp5 = and i32 %tmp4, 1
+  %tmp6 = select i1 %tmp3, i32 %tmp5, i32 1
+  ret i32 %tmp6
+}
+
+define <2 x i32> @f_var3_splatvec(<2 x i32> %arg, <2 x i32> %arg1, <2 x i32> %arg2) {
+; CHECK-LABEL: @f_var3_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[ARG2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP6]]
+;
+  %tmp = and <2 x i32> %arg, %arg1
+  %tmp3 = icmp eq <2 x i32> %tmp, zeroinitializer
+  %tmp4 = lshr <2 x i32> %arg, %arg2
+  %tmp5 = and <2 x i32> %tmp4, <i32 1, i32 1>
+  %tmp6 = select <2 x i1> %tmp3, <2 x i32> %tmp5, <2 x i32> <i32 1, i32 1>
+  ret <2 x i32> %tmp6
+}
+
+define <3 x i32> @f_var3_vec_undef(<3 x i32> %arg, <3 x i32> %arg1, <3 x i32> %arg2) {
+; CHECK-LABEL: @f_var3_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <3 x i32> <i32 1, i32 1, i32 1>, [[ARG2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <3 x i32> [[TMP1]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and <3 x i32> [[TMP2]], [[ARG:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <3 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = zext <3 x i1> [[TMP4]] to <3 x i32>
+; CHECK-NEXT:    ret <3 x i32> [[TMP6]]
+;
+  %tmp = and <3 x i32> %arg, %arg1
+  %tmp3 = icmp eq <3 x i32> %tmp, <i32 0, i32 undef, i32 0>
+  %tmp4 = lshr <3 x i32> %arg, %arg2
+  %tmp5 = and <3 x i32> %tmp4, <i32 1, i32 undef, i32 1>
+  %tmp6 = select <3 x i1> %tmp3, <3 x i32> %tmp5, <3 x i32> <i32 1, i32 undef, i32 1>
+  ret <3 x i32> %tmp6
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use32(i32)
+
+declare void @use1(i1)
+
+define i32 @n_var0_oneuse(i32 %arg, i32 %arg1, i32 %arg2) {
+; CHECK-LABEL: @n_var0_oneuse(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[ARG]], [[ARG2:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP3]], i32 [[TMP5]], i32 1
+; CHECK-NEXT:    call void @use32(i32 [[TMP]])
+; CHECK-NEXT:    call void @use1(i1 [[TMP3]])
+; CHECK-NEXT:    call void @use32(i32 [[TMP4]])
+; CHECK-NEXT:    call void @use32(i32 [[TMP5]])
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %tmp = and i32 %arg, %arg1
+  %tmp3 = icmp eq i32 %tmp, 0
+  %tmp4 = lshr i32 %arg, %arg2
+  %tmp5 = and i32 %tmp4, 1
+  %tmp6 = select i1 %tmp3, i32 %tmp5, i32 1
+  call void @use32(i32 %tmp)
+  call void @use1(i1 %tmp3)
+  call void @use32(i32 %tmp4)
+  call void @use32(i32 %tmp5)
+  ret i32 %tmp6
+}
+
+define i32 @n_var1_oneuse(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: @n_var1_oneuse(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[ARG]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 1
+; CHECK-NEXT:    call void @use32(i32 [[TMP]])
+; CHECK-NEXT:    call void @use1(i1 [[TMP2]])
+; CHECK-NEXT:    call void @use32(i32 [[TMP3]])
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg, %arg1
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = and i32 %arg, 1
+  %tmp4 = select i1 %tmp2, i32 %tmp3, i32 1
+  call void @use32(i32 %tmp)
+  call void @use1(i1 %tmp2)
+  call void @use32(i32 %tmp3)
+  ret i32 %tmp4
+}
+
+; Different variables are used
+
+define i32 @n0(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[ARG1:%.*]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i32 [[TMP4]], i32 1
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %tmp = and i32 %arg, 1
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = lshr i32 %arg1, 1 ; works on %arg1 instead of %arg
+  %tmp4 = and i32 %tmp3, 1
+  %tmp5 = select i1 %tmp2, i32 %tmp4, i32 1
+  ret i32 %tmp5
+}
+
+define i32 @n1(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[ARG1:%.*]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 1
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg, 2
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = and i32 %arg1, 1 ; works on %arg1 instead of %arg
+  %tmp4 = select i1 %tmp2, i32 %tmp3, i32 1
+  ret i32 %tmp4
+}
+
+; False-value is not 1
+
+define i32 @n2(i32 %arg) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[ARG]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i32 [[TMP3]], i32 0
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg, 1
+  %tmp1 = icmp eq i32 %tmp, 0
+  %tmp2 = lshr i32 %arg, 2
+  %tmp3 = and i32 %tmp2, 1
+  %tmp4 = select i1 %tmp1, i32 %tmp3, i32 0 ; 0 instead of 1
+  ret i32 %tmp4
+}
+
+define i32 @n3(i32 %arg) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[TMP2]], i32 0
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = and i32 %arg, 2
+  %tmp1 = icmp eq i32 %tmp, 0
+  %tmp2 = and i32 %arg, 1
+  %tmp3 = select i1 %tmp1, i32 %tmp2, i32 0 ; 0 instead of 1
+  ret i32 %tmp3
+}
+
+; Mask of second and is not one
+
+define i32 @n4(i32 %arg) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[ARG]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i32 [[TMP3]], i32 1
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg, 1
+  %tmp1 = icmp eq i32 %tmp, 0
+  %tmp2 = lshr i32 %arg, 2
+  %tmp3 = and i32 %tmp2, 2 ; 2 instead of 1
+  %tmp4 = select i1 %tmp1, i32 %tmp3, i32 1
+  ret i32 %tmp4
+}
+
+define i32 @n5(i32 %arg) {
+; CHECK-LABEL: @n5(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = and i32 %arg, 2
+  %tmp1 = icmp eq i32 %tmp, 0
+  %tmp2 = and i32 %arg, 2 ; 2 instead of 1
+  %tmp3 = select i1 %tmp1, i32 %tmp2, i32 1
+  ret i32 %tmp3
+}
+
+; Wrong icmp pred
+
+define i32 @n6(i32 %arg) {
+; CHECK-LABEL: @n6(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[ARG]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg, 1
+  %tmp1 = icmp ne i32 %tmp, 0 ; ne, not eq
+  %tmp2 = lshr i32 %arg, 2
+  %tmp3 = and i32 %tmp2, 1
+  %tmp4 = select i1 %tmp1, i32 %tmp3, i32 1
+  ret i32 %tmp4
+}
+
+define i32 @n7(i32 %arg) {
+; CHECK-LABEL: @n7(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = and i32 %arg, 2
+  %tmp1 = icmp ne i32 %tmp, 0 ; ne, not eq
+  %tmp2 = and i32 %arg, 1
+  %tmp3 = select i1 %tmp1, i32 %tmp2, i32 1
+  ret i32 %tmp3
+}
+
+; icmp second operand is not zero
+
+define i32 @n8(i32 %arg) {
+; CHECK-LABEL: @n8(
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[ARG]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %tmp = and i32 %arg, 1
+  %tmp1 = icmp eq i32 %tmp, 1
+  %tmp2 = lshr i32 %arg, 2
+  %tmp3 = and i32 %tmp2, 1
+  %tmp4 = select i1 %tmp1, i32 %tmp3, i32 1
+  ret i32 %tmp4
+}
diff --git a/llvm/test/Transforms/InstCombine/select-pr39595.ll b/llvm/test/Transforms/InstCombine/select-pr39595.ll
new file mode 100644
index 00000000000..0f88d66e4d1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-pr39595.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @foo(i32 %x, i32 %y) {
+; CHECK-LABEL: foo
+; CHECK:      [[TMP1:%.*]] = icmp ult i32 %y, %x
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 %x, i32 %y, !prof ![[$MD0:[0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT: ret i32 [[TMP3:%.*]]
+; CHECK-DAG:  !0 = !{!"branch_weights", i32 6, i32 1}
+
+  %1 = xor i32 %x, -1
+  %2 = xor i32 %y, -1
+  %3 = icmp ugt i32 %1, %2
+  %4 = select i1 %3, i32 %2, i32 %1, !prof !1
+  ret i32 %4
+}
+
+!1 = !{!"branch_weights", i32 1, i32 6}
diff --git a/llvm/test/Transforms/InstCombine/select-select.ll b/llvm/test/Transforms/InstCombine/select-select.ll
new file mode 100644
index 00000000000..768d1c47c20
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-select.ll
@@ -0,0 +1,34 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK: @foo1
+define float @foo1(float %a) #0 {
+; CHECK-NOT: xor
+  %b = fcmp ogt float %a, 0.000000e+00
+  %c = select i1 %b, float %a, float 0.000000e+00
+  %d = fcmp olt float %c, 1.000000e+00
+  %f = select i1 %d, float %c, float 1.000000e+00
+  ret float %f
+}
+
+; CHECK: @foo2
+define float @foo2(float %a) #0 {
+; CHECK-NOT: xor
+  %b = fcmp ogt float %a, 0.000000e+00
+  %c = select i1 %b, float %a, float 0.000000e+00
+  %d = fcmp olt float %c, 1.000000e+00
+  %e = select i1 %b, float %a, float 0.000000e+00
+  %f = select i1 %d, float %e, float 1.000000e+00
+  ret float %f
+}
+
+; CHECK-LABEL: @foo3
+define <2 x i32> @foo3(<2 x i1> %vec_bool, i1 %bool, <2 x i32> %V) {
+; CHECK: %[[sel0:.*]] = select <2 x i1> %vec_bool, <2 x i32> zeroinitializer, <2 x i32> %V
+; CHECK: %[[sel1:.*]] = select i1 %bool, <2 x i32> %[[sel0]], <2 x i32> %V
+; CHECK: ret <2 x i32> %[[sel1]]
+  %sel0 = select <2 x i1> %vec_bool, <2 x i32> zeroinitializer, <2 x i32> %V
+  %sel1 = select i1 %bool, <2 x i32> %sel0, <2 x i32> %V
+  ret <2 x i32> %sel1
+}
+
+attributes #0 = { nounwind readnone ssp uwtable }
diff --git a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
new file mode 100644
index 00000000000..8acf49f0ef8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
@@ -0,0 +1,1451 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "n8:16:32:64"
+
+define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2(
+; CHECK-NEXT:    [[AND:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[AND]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec(
+; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %and = and <2 x i32> %x, <i32 1, i32 1>
+  %cmp = icmp eq <2 x i32> %and, zeroinitializer
+  %or = or <2 x i32> %y, <i32 2, i32 2>
+  %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @select_icmp_eq_and_1_0_xor_2(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_1_0_xor_2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @select_icmp_eq_and_1_0_and_not_2(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_1_0_and_not_2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -3
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -3
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i32 @select_icmp_eq_and_32_0_or_8(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8(
+; CHECK-NEXT:    [[AND:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[AND]], 8
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %and = and i32 %x, 32
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 8
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define <2 x i32> @select_icmp_eq_and_32_0_or_8_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8_vec(
+; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %and = and <2 x i32> %x, <i32 32, i32 32>
+  %cmp = icmp eq <2 x i32> %and, zeroinitializer
+  %or = or <2 x i32> %y, <i32 8, i32 8>
+  %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @select_icmp_eq_and_32_0_xor_8(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_32_0_xor_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 8
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 32
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 8
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @select_icmp_eq_and_32_0_and_not_8(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_32_0_and_not_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -9
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 32
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -9
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_4096_or_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[AND]], 4096
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define <2 x i32> @select_icmp_ne_0_and_4096_or_4096_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 4096, i32 4096>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[AND]], <i32 4096, i32 4096>
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %and = and <2 x i32> %x, <i32 4096, i32 4096>
+  %cmp = icmp ne <2 x i32> zeroinitializer, %and
+  %or = or <2 x i32> %y, <i32 4096, i32 4096>
+  %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @select_icmp_ne_0_and_4096_xor_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_xor_4096(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_4096_and_not_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_and_not_4096(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[AND2]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define <2 x i32> @select_icmp_eq_and_4096_0_or_4096_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 4096, i32 4096>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[AND]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %and = and <2 x i32> %x, <i32 4096, i32 4096>
+  %cmp = icmp eq <2 x i32> %and, zeroinitializer
+  %or = or <2 x i32> %y, <i32 4096, i32 4096>
+  %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @select_icmp_eq_and_4096_0_xor_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_4096_0_xor_4096(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @select_icmp_eq_and_4096_0_and_not_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_and_4096_0_and_not_4096(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %and = and i64 %x, 1
+  %cmp = icmp eq i64 %and, 0
+  %or = or i32 %y, 1
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define <2 x i32> @select_icmp_eq_0_and_1_or_1_vec(<2 x i64> %x, <2 x i32> %y) {
+; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %and = and <2 x i64> %x, <i64 1, i64 1>
+  %cmp = icmp eq <2 x i64> %and, zeroinitializer
+  %or = or <2 x i32> %y, <i32 1, i32 1>
+  %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @select_icmp_eq_0_and_1_xor_1(i64 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_0_and_1_xor_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT:    [[SELECT:%.*]] = xor i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i64 %x, 1
+  %cmp = icmp eq i64 %and, 0
+  %xor = xor i32 %y, 1
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @select_icmp_eq_0_and_1_and_not_1(i64 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_eq_0_and_1_and_not_1(
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i64 %x, 1
+  %cmp = icmp eq i64 %and, 0
+  %and2 = and i32 %y, -2
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_4096_or_32(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_32(
+; CHECK-NEXT:    [[AND:%.*]] = lshr i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[AND]], 32
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 32
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 32
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_4096_xor_32(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_xor_32(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 32
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 32
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_4096_and_not_32(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_4096_and_not_32(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -33
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[AND2]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, -33
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_32_or_4096(
+; CHECK-NEXT:    [[AND:%.*]] = shl i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[AND]], 4096
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 4096
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %and = and i32 %x, 32
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define <2 x i32> @select_icmp_ne_0_and_32_or_4096_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_32_or_4096_vec(
+; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 4096, i32 4096>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], <i32 4096, i32 4096>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %and = and <2 x i32> %x, <i32 32, i32 32>
+  %cmp = icmp ne <2 x i32> zeroinitializer, %and
+  %or = or <2 x i32> %y, <i32 4096, i32 4096>
+  %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @select_icmp_ne_0_and_32_xor_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_32_xor_4096(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 32
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_32_and_not_4096(i32 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_32_and_not_4096(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[AND2]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 32
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1073741824
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[Y:%.*]], 8
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i8 [[OR]], i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[SELECT]]
+;
+  %and = and i32 %x, 1073741824
+  %cmp = icmp ne i32 0, %and
+  %or = or i8 %y, 8
+  %select = select i1 %cmp, i8 %y, i8 %or
+  ret i8 %select
+}
+
+define i8 @select_icmp_ne_0_and_1073741824_xor_8(i32 %x, i8 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_xor_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1073741824
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[Y:%.*]], 8
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i8 [[XOR]], i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[SELECT]]
+;
+  %and = and i32 %x, 1073741824
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i8 %y, 8
+  %select = select i1 %cmp, i8 %y, i8 %xor
+  ret i8 %select
+}
+
+define i8 @select_icmp_ne_0_and_1073741824_and_not_8(i32 %x, i8 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_and_not_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1073741824
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i8 [[Y:%.*]], -9
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i8 [[AND2]], i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[SELECT]]
+;
+  %and = and i32 %x, 1073741824
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i8 %y, -9
+  %select = select i1 %cmp, i8 %y, i8 %and2
+  ret i8 %select
+}
+
+define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_8_or_1073741824(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 1073741824
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i8 %x, 8
+  %cmp = icmp ne i8 0, %and
+  %or = or i32 %y, 1073741824
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_8_xor_1073741824(i8 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_8_xor_1073741824(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 1073741824
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i8 %x, 8
+  %cmp = icmp ne i8 0, %and
+  %xor = xor i32 %y, 1073741824
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @select_icmp_ne_0_and_8_and_not_1073741824(i8 %x, i32 %y) {
+; CHECK-LABEL: @select_icmp_ne_0_and_8_and_not_1073741824(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -1073741825
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[AND2]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i8 %x, 8
+  %cmp = icmp ne i8 0, %and
+  %and2 = and i32 %y, -1073741825
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+; We can't combine here, because the cmp is scalar and the or vector.
+; Just make sure we don't assert.
+define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
+; CHECK-LABEL: @select_icmp_eq_and_1_0_or_vector_of_2s(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[Y:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x i32> [[Y]], <2 x i32> [[OR]]
+; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or <2 x i32> %y, <i32 2, i32 2>
+  %select = select i1 %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @select_icmp_and_8_ne_0_xor_8(i32 %x) {
+; CHECK-LABEL: @select_icmp_and_8_ne_0_xor_8(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -9
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 8
+  %x.xor = select i1 %cmp, i32 %x, i32 %xor
+  ret i32 %x.xor
+}
+
+define i32 @select_icmp_and_8_eq_0_xor_8(i32 %x) {
+; CHECK-LABEL: @select_icmp_and_8_eq_0_xor_8(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 8
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 8
+  %xor.x = select i1 %cmp, i32 %xor, i32 %x
+  ret i32 %xor.x
+}
+
+define i64 @select_icmp_x_and_8_eq_0_y_xor_8(i32 %x, i64 %y) {
+; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_xor_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i64 [[Y:%.*]], 8
+; CHECK-NEXT:    [[Y_XOR:%.*]] = select i1 [[CMP]], i64 [[Y]], i64 [[XOR]]
+; CHECK-NEXT:    ret i64 [[Y_XOR]]
+;
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i64 %y, 8
+  %y.xor = select i1 %cmp, i64 %y, i64 %xor
+  ret i64 %y.xor
+}
+
+define i64 @select_icmp_x_and_8_ne_0_y_xor_8(i32 %x, i64 %y) {
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_xor_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i64 [[Y:%.*]], 8
+; CHECK-NEXT:    [[XOR_Y:%.*]] = select i1 [[CMP]], i64 [[XOR]], i64 [[Y]]
+; CHECK-NEXT:    ret i64 [[XOR_Y]]
+;
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i64 %y, 8
+  %xor.y = select i1 %cmp, i64 %xor, i64 %y
+  ret i64 %xor.y
+}
+
+define i64 @select_icmp_x_and_8_ne_0_y_or_8(i32 %x, i64 %y) {
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[AND]], 8
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %or = or i64 %y, 8
+  %or.y = select i1 %cmp, i64 %or, i64 %y
+  ret i64 %or.y
+}
+
+define <2 x i64> @select_icmp_x_and_8_ne_0_y_or_8_vec(<2 x i32> %x, <2 x i64> %y) {
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8_vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[AND]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %and = and <2 x i32> %x, <i32 8, i32 8>
+  %cmp = icmp eq <2 x i32> %and, zeroinitializer
+  %or = or <2 x i64> %y, <i64 8, i64 8>
+  %or.y = select <2 x i1> %cmp, <2 x i64> %or, <2 x i64> %y
+  ret <2 x i64> %or.y
+}
+
+define i64 @select_icmp_x_and_8_ne_0_y_and_not_8(i32 %x, i64 %y) {
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_and_not_8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i64 [[Y:%.*]], -9
+; CHECK-NEXT:    [[AND_Y:%.*]] = select i1 [[CMP]], i64 [[AND2]], i64 [[Y]]
+; CHECK-NEXT:    ret i64 [[AND_Y]]
+;
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i64 %y, -9
+  %and.y = select i1 %cmp, i64 %and2, i64 %y
+  ret i64 %and.y
+}
+
+define i32 @select_icmp_and_2147483648_ne_0_xor_2147483648(i32 %x) {
+; CHECK-LABEL: @select_icmp_and_2147483648_ne_0_xor_2147483648(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 2147483648
+  %x.xor = select i1 %cmp, i32 %x, i32 %xor
+  ret i32 %x.xor
+}
+
+define i32 @select_icmp_and_2147483648_eq_0_xor_2147483648(i32 %x) {
+; CHECK-LABEL: @select_icmp_and_2147483648_eq_0_xor_2147483648(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 2147483648
+  %xor.x = select i1 %cmp, i32 %xor, i32 %x
+  ret i32 %xor.x
+}
+
+define i32 @select_icmp_x_and_2147483648_ne_0_or_2147483648(i32 %x) {
+; CHECK-LABEL: @select_icmp_x_and_2147483648_ne_0_or_2147483648(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %x, 2147483648
+  %or.x = select i1 %cmp, i32 %or, i32 %x
+  ret i32 %or.x
+}
+
+define i32 @test68(i32 %x, i32 %y) {
+; CHECK-LABEL: @test68(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %and = and i32 %x, 128
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define <2 x i32> @test68vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test68vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 6, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %and = and <2 x i32> %x, <i32 128, i32 128>
+  %cmp = icmp eq <2 x i32> %and, zeroinitializer
+  %or = or <2 x i32> %y, <i32 2, i32 2>
+  %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @test68_xor(i32 %x, i32 %y) {
+; CHECK-LABEL: @test68_xor(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[TMP1]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 128
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @test68_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @test68_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[TMP1]], -1
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -3
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 128
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -3
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i32 @test69(i32 %x, i32 %y) {
+; CHECK-LABEL: @test69(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %and = and i32 %x, 128
+  %cmp = icmp ne i32 %and, 0
+  %or = or i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+define <2 x i32> @test69vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test69vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 6, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[TMP3]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
+;
+  %and = and <2 x i32> %x, <i32 128, i32 128>
+  %cmp = icmp ne <2 x i32> %and, zeroinitializer
+  %or = or <2 x i32> %y, <i32 2, i32 2>
+  %select = select <2 x i1> %cmp, <2 x i32> %y, <2 x i32> %or
+  ret <2 x i32> %select
+}
+
+define i32 @test69_xor(i32 %x, i32 %y) {
+; CHECK-LABEL: @test69_xor(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 128
+  %cmp = icmp ne i32 %and, 0
+  %xor = xor i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  ret i32 %select
+}
+
+define i32 @test69_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @test69_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+  %and = and i32 %x, 128
+  %cmp = icmp ne i32 %and, 0
+  %and2 = and i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  ret i32 %select
+}
+
+define i8 @test70(i8 %x, i8 %y) {
+; CHECK-LABEL: @test70(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i8 [[OR]], i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[SELECT]]
+;
+  %cmp = icmp slt i8 %x, 0
+  %or = or i8 %y, 2
+  %select = select i1 %cmp, i8 %or, i8 %y
+  ret i8 %select
+}
+
+define i32 @shift_no_xor_multiuse_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @shift_no_xor_multiuse_or(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[AND:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[AND]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[TMP2]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %res = mul i32 %select, %or ; to bump up use count of the Or
+  ret i32 %res
+}
+
+define i32 @shift_no_xor_multiuse_xor(i32 %x, i32 %y) {
+; CHECK-LABEL: @shift_no_xor_multiuse_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %res = mul i32 %select, %xor ; to bump up use count of the Xor
+  ret i32 %res
+}
+
+define i32 @shift_no_xor_multiuse_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @shift_no_xor_multiuse_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -3
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -3
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %res = mul i32 %select, %and2 ; to bump up use count of the And
+  ret i32 %res
+}
+
+define i32 @no_shift_no_xor_multiuse_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_or(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], [[Y]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[TMP1]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %res = mul i32 %select, %or ; to bump up use count of the Or
+  ret i32 %res
+}
+
+define i32 @no_shift_no_xor_multiuse_xor(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %res = mul i32 %select, %xor ; to bump up use count of the Xor
+  ret i32 %res
+}
+
+define i32 @no_shift_no_xor_multiuse_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = add i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %and2 = add i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %res = mul i32 %select, %and2 ; to bump up use count of the And
+  ret i32 %res
+}
+
+define i32 @no_shift_xor_multiuse_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_shift_xor_multiuse_or(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[AND]], 4096
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[TMP2]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %res = mul i32 %select, %or ; to bump up use count of the Or
+  ret i32 %res
+}
+
+define i32 @no_shift_xor_multiuse_xor(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_shift_xor_multiuse_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[Y]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %res = mul i32 %select, %xor ; to bump up use count of the Xor
+  ret i32 %res
+}
+
+define i32 @no_shift_xor_multiuse_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_shift_xor_multiuse_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[AND2]], i32 [[Y]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %res = mul i32 %select, %and2 ; to bump up use count of the And
+  ret i32 %res
+}
+
+define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @shift_xor_multiuse_or(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 2048
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 [[Y]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 2048
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %res = mul i32 %select, %or ; to bump up use count of the Or
+  ret i32 %res
+}
+
+define i32 @shift_xor_multiuse_xor(i32 %x, i32 %y) {
+; CHECK-LABEL: @shift_xor_multiuse_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2048
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[Y]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 2048
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %res = mul i32 %select, %xor ; to bump up use count of the Xor
+  ret i32 %res
+}
+
+define i32 @shift_xor_multiuse_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @shift_xor_multiuse_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -2049
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[AND2]], i32 [[Y]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, -2049
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %res = mul i32 %select, %and2 ; to bump up use count of the and
+  ret i32 %res
+}
+
+define i32 @shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_no_xor_multiuse_cmp(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[AND]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @shift_no_xor_multiuse_cmp_with_xor(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_no_xor_multiuse_cmp_with_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @shift_no_xor_multiuse_cmp_with_and(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_no_xor_multiuse_cmp_with_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -3
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -3
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @no_shift_no_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], [[Y:%.*]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[TMP1]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @no_shift_no_xor_multiuse_cmp_with_xor(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_with_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @no_shift_no_xor_multiuse_cmp_with_and(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_with_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_xor_multiuse_cmp(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[AND]], 4096
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[W:%.*]], i32 [[Z:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @no_shift_xor_multiuse_cmp_with_xor(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_xor_multiuse_cmp_with_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @no_shift_xor_multiuse_cmp_with_and(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_xor_multiuse_cmp_with_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_xor_multiuse_cmp(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 2048
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 2048
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @shift_xor_multiuse_cmp_with_xor(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_xor_multiuse_cmp_with_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2048
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 2048
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @shift_xor_multiuse_cmp_with_and(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_xor_multiuse_cmp_with_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -2049
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, -2049
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  ret i32 %res
+}
+
+define i32 @shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_no_xor_multiuse_cmp_or(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %or ; to bump up the use count of the or
+  ret i32 %res2
+}
+
+define i32 @shift_no_xor_multiuse_cmp_xor(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_no_xor_multiuse_cmp_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %xor ; to bump up the use count of the xor
+  ret i32 %res2
+}
+
+define i32 @shift_no_xor_multiuse_cmp_and(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_no_xor_multiuse_cmp_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -3
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -3
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %and2 ; to bump up the use count of the and
+  ret i32 %res2
+}
+
+define i32 @no_shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_or(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], [[Y]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[TMP1]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %or ; to bump up the use count of the or
+  ret i32 %res2
+}
+
+define i32 @no_shift_no_xor_multiuse_cmp_xor(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %xor ; to bump up the use count of the xor
+  ret i32 %res2
+}
+
+define i32 @no_shift_no_xor_multiuse_cmp_and(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_no_xor_multiuse_cmp_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %and2 = and i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %and2 ; to bump up the use count of the and
+  ret i32 %res2
+}
+
+define i32 @no_shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_xor_multiuse_cmp_or(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %or ; to bump up the use count of the or
+  ret i32 %res2
+}
+
+define i32 @no_shift_xor_multiuse_cmp_xor(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_xor_multiuse_cmp_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 4096
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %xor ; to bump up the use count of the xor
+  ret i32 %res2
+}
+
+define i32 @no_shift_xor_multiuse_cmp_and(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @no_shift_xor_multiuse_cmp_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], -4097
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, -4097
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %and2 ; to bump up the use count of the and
+  ret i32 %res2
+}
+
+define i32 @shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_xor_multiuse_cmp_or(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 2048
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 2048
+  %select = select i1 %cmp, i32 %y, i32 %or
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %or ; to bump up the use count of the or
+  ret i32 %res2
+}
+
+define i32 @shift_xor_multiuse_cmp_xor(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_xor_multiuse_cmp_xor(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[Y:%.*]], 2048
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[XOR]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %xor = xor i32 %y, 2048
+  %select = select i1 %cmp, i32 %y, i32 %xor
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %xor ; to bump up the use count of the xor
+  ret i32 %res2
+}
+
+define i32 @shift_xor_multiuse_cmp_and(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @shift_xor_multiuse_cmp_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], 2048
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[AND2]]
+; CHECK-NEXT:    [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
+; CHECK-NEXT:    [[RES2:%.*]] = mul i32 [[RES]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[RES2]]
+;
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %and2 = and i32 %y, 2048
+  %select = select i1 %cmp, i32 %y, i32 %and2
+  %select2 = select i1 %cmp, i32 %z, i32 %w ; to bump up use count of the cmp
+  %res = mul i32 %select, %select2
+  %res2 = mul i32 %res, %and2 ; to bump up the use count of the and
+  ret i32 %res2
+}
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
new file mode 100644
index 00000000000..97203768213
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -0,0 +1,1506 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1822
+
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
+
+define i32 @test1(i32 %A, i32 %B) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 [[B:%.*]]
+;
+  %C = select i1 false, i32 %A, i32 %B
+  ret i32 %C
+}
+
+define i32 @test2(i32 %A, i32 %B) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %C = select i1 true, i32 %A, i32 %B
+  ret i32 %C
+}
+
+
+define i32 @test3(i1 %C, i32 %I) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i32 [[I:%.*]]
+;
+  %V = select i1 %C, i32 %I, i32 %I
+  ret i32 %V
+}
+
+define i1 @test4(i1 %C) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret i1 [[C:%.*]]
+;
+  %V = select i1 %C, i1 true, i1 false
+  ret i1 %V
+}
+
+define i1 @test5(i1 %C) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    ret i1 [[NOT_C]]
+;
+  %V = select i1 %C, i1 false, i1 true
+  ret i1 %V
+}
+
+define i32 @test6(i1 %C) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[V:%.*]] = zext i1 [[C:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %V = select i1 %C, i32 1, i32 0
+  ret i32 %V
+}
+
+define i1 @trueval_is_true(i1 %C, i1 %X) {
+; CHECK-LABEL: @trueval_is_true(
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[C:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %R = select i1 %C, i1 true, i1 %X
+  ret i1 %R
+}
+
+define <2 x i1> @trueval_is_true_vec(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @trueval_is_true_vec(
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i1> [[C:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %R = select <2 x i1> %C, <2 x i1> <i1 true, i1 true>, <2 x i1> %X
+  ret <2 x i1> %R
+}
+
+define <2 x i1> @trueval_is_true_vec_undef_elt(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @trueval_is_true_vec_undef_elt(
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i1> [[C:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %R = select <2 x i1> %C, <2 x i1> <i1 undef, i1 true>, <2 x i1> %X
+  ret <2 x i1> %R
+}
+
+define i1 @test8(i1 %C, i1 %X) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[C:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %R = select i1 %C, i1 %X, i1 false
+  ret i1 %R
+}
+
+define <2 x i1> @test8vec(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @test8vec(
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[C:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %R = select <2 x i1> %C, <2 x i1> %X, <2 x i1> <i1 false, i1 false>
+  ret <2 x i1> %R
+}
+
+define i1 @test9(i1 %C, i1 %X) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[NOT_C]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %R = select i1 %C, i1 false, i1 %X
+  ret i1 %R
+}
+
+define <2 x i1> @test9vec(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @test9vec(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[NOT_C]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %R = select <2 x i1> %C, <2 x i1> <i1 false, i1 false>, <2 x i1> %X
+  ret <2 x i1> %R
+}
+
+define i1 @test10(i1 %C, i1 %X) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[NOT_C]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %R = select i1 %C, i1 %X, i1 true
+  ret i1 %R
+}
+
+define <2 x i1> @test10vec(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @test10vec(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i1> [[NOT_C]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %R = select <2 x i1> %C, <2 x i1> %X, <2 x i1> <i1 true, i1 true>
+  ret <2 x i1> %R
+}
+
+define i1 @test23(i1 %a, i1 %b) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %c = select i1 %a, i1 %b, i1 %a
+  ret i1 %c
+}
+
+define <2 x i1> @test23vec(<2 x i1> %a, <2 x i1> %b) {
+; CHECK-LABEL: @test23vec(
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %c = select <2 x i1> %a, <2 x i1> %b, <2 x i1> %a
+  ret <2 x i1> %c
+}
+
+define i1 @test24(i1 %a, i1 %b) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %c = select i1 %a, i1 %a, i1 %b
+  ret i1 %c
+}
+
+define <2 x i1> @test24vec(<2 x i1> %a, <2 x i1> %b) {
+; CHECK-LABEL: @test24vec(
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i1> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %c = select <2 x i1> %a, <2 x i1> %a, <2 x i1> %b
+  ret <2 x i1> %c
+}
+
+define i1 @test62(i1 %A, i1 %B) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i1 [[A:%.*]], true
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[NOT]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %not = xor i1 %A, true
+  %C = select i1 %A, i1 %not, i1 %B
+  ret i1 %C
+}
+
+define <2 x i1> @test62vec(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test62vec(
+; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i1> [[NOT]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %not = xor <2 x i1> %A, <i1 true, i1 true>
+  %C = select <2 x i1> %A, <2 x i1> %not, <2 x i1> %B
+  ret <2 x i1> %C
+}
+
+define i1 @test63(i1 %A, i1 %B) {
+; CHECK-LABEL: @test63(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i1 [[A:%.*]], true
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[NOT]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %not = xor i1 %A, true
+  %C = select i1 %A, i1 %B, i1 %not
+  ret i1 %C
+}
+
+define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test63vec(
+; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i1> [[NOT]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %not = xor <2 x i1> %A, <i1 true, i1 true>
+  %C = select <2 x i1> %A, <2 x i1> %B, <2 x i1> %not
+  ret <2 x i1> %C
+}
+
+define i32 @test11(i32 %a) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = zext i1 [[C]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %C = icmp eq i32 %a, 0
+  %R = select i1 %C, i32 0, i32 1
+  ret i32 %R
+}
+
+define i32 @test12(i1 %cond, i32 %a) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[B:%.*]] = zext i1 [[COND:%.*]] to i32
+; CHECK-NEXT:    [[C:%.*]] = or i32 [[B]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %b = or i32 %a, 1
+  %c = select i1 %cond, i32 %b, i32 %a
+  ret i32 %c
+}
+
+define <2 x i32> @test12vec(<2 x i1> %cond, <2 x i32> %a) {
+; CHECK-LABEL: @test12vec(
+; CHECK-NEXT:    [[B:%.*]] = zext <2 x i1> [[COND:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[B]], [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
+  %b = or <2 x i32> %a, <i32 1, i32 1>
+  %c = select <2 x i1> %cond, <2 x i32> %b, <2 x i32> %a
+  ret <2 x i32> %c
+}
+
+define i32 @test12a(i1 %cond, i32 %a) {
+; CHECK-LABEL: @test12a(
+; CHECK-NEXT:    [[B:%.*]] = zext i1 [[COND:%.*]] to i32
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[A:%.*]], [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %b = ashr i32 %a, 1
+  %c = select i1 %cond, i32 %b, i32 %a
+  ret i32 %c
+}
+
+define <2 x i32> @test12avec(<2 x i1> %cond, <2 x i32> %a) {
+; CHECK-LABEL: @test12avec(
+; CHECK-NEXT:    [[B:%.*]] = zext <2 x i1> [[COND:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[C:%.*]] = ashr <2 x i32> [[A:%.*]], [[B]]
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
+  %b = ashr <2 x i32> %a, <i32 1, i32 1>
+  %c = select <2 x i1> %cond, <2 x i32> %b, <2 x i32> %a
+  ret <2 x i32> %c
+}
+
+define i32 @test12b(i1 %cond, i32 %a) {
+; CHECK-LABEL: @test12b(
+; CHECK-NEXT:    [[NOT_COND:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    [[B:%.*]] = zext i1 [[NOT_COND]] to i32
+; CHECK-NEXT:    [[D:%.*]] = ashr i32 [[A:%.*]], [[B]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %b = ashr i32 %a, 1
+  %d = select i1 %cond, i32 %a, i32 %b
+  ret i32 %d
+}
+
+define <2 x i32> @test12bvec(<2 x i1> %cond, <2 x i32> %a) {
+; CHECK-LABEL: @test12bvec(
+; CHECK-NEXT:    [[NOT_COND:%.*]] = xor <2 x i1> [[COND:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[B:%.*]] = zext <2 x i1> [[NOT_COND]] to <2 x i32>
+; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[A:%.*]], [[B]]
+; CHECK-NEXT:    ret <2 x i32> [[D]]
+;
+  %b = ashr <2 x i32> %a, <i32 1, i32 1>
+  %d = select <2 x i1> %cond, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %d
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret i32 [[B:%.*]]
+;
+  %C = icmp eq i32 %a, %b
+  %V = select i1 %C, i32 %a, i32 %b
+  ret i32 %V
+}
+
+define i32 @test13a(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13a(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %C = icmp ne i32 %a, %b
+  %V = select i1 %C, i32 %a, i32 %b
+  ret i32 %V
+}
+
+define i32 @test13b(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13b(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %C = icmp eq i32 %a, %b
+  %V = select i1 %C, i32 %b, i32 %a
+  ret i32 %V
+}
+
+define i1 @test14a(i1 %C, i32 %X) {
+; CHECK-LABEL: @test14a(
+; CHECK-NEXT:    [[R1:%.*]] = icmp slt i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[R1]], [[NOT_C]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %V = select i1 %C, i32 %X, i32 0
+  ; (X < 1) | !C
+  %R = icmp slt i32 %V, 1
+  ret i1 %R
+}
+
+define i1 @test14b(i1 %C, i32 %X) {
+; CHECK-LABEL: @test14b(
+; CHECK-NEXT:    [[R1:%.*]] = icmp slt i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i1 [[R1]], [[C:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %V = select i1 %C, i32 0, i32 %X
+  ; (X < 1) | C
+  %R = icmp slt i32 %V, 1
+  ret i1 %R
+}
+
+define i32 @test16(i1 %C, i32* %P) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %P2 = select i1 %C, i32* %P, i32* null
+  %V = load i32, i32* %P2
+  ret i32 %V
+}
+
+;; It may be legal to load from a null address in a non-zero address space
+define i32 @test16_neg(i1 %C, i32 addrspace(1)* %P) {
+; CHECK-LABEL: @test16_neg(
+; CHECK-NEXT:    [[P2:%.*]] = select i1 [[C:%.*]], i32 addrspace(1)* [[P:%.*]], i32 addrspace(1)* null
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32 addrspace(1)* [[P2]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %P2 = select i1 %C, i32 addrspace(1)* %P, i32 addrspace(1)* null
+  %V = load i32, i32 addrspace(1)* %P2
+  ret i32 %V
+}
+
+define i32 @test16_neg2(i1 %C, i32 addrspace(1)* %P) {
+; CHECK-LABEL: @test16_neg2(
+; CHECK-NEXT:    [[P2:%.*]] = select i1 [[C:%.*]], i32 addrspace(1)* null, i32 addrspace(1)* [[P:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32 addrspace(1)* [[P2]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %P2 = select i1 %C, i32 addrspace(1)* null, i32 addrspace(1)* %P
+  %V = load i32, i32 addrspace(1)* %P2
+  ret i32 %V
+}
+
+;; It may be legal to load from a null address with null pointer valid attribute.
+define i32 @test16_no_null_opt(i1 %C, i32* %P) #0 {
+; CHECK-LABEL: @test16_no_null_opt(
+; CHECK-NEXT:    [[P2:%.*]] = select i1 [[C:%.*]], i32* [[P:%.*]], i32* null
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P2]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %P2 = select i1 %C, i32* %P, i32* null
+  %V = load i32, i32* %P2
+  ret i32 %V
+}
+
+define i32 @test16_no_null_opt_2(i1 %C, i32* %P) #0 {
+; CHECK-LABEL: @test16_no_null_opt_2(
+; CHECK-NEXT:    [[P2:%.*]] = select i1 [[C:%.*]], i32* null, i32* [[P:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P2]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %P2 = select i1 %C, i32* null, i32* %P
+  %V = load i32, i32* %P2
+  ret i32 %V
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }
+
+define i1 @test17(i32* %X, i1 %C) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[RV1:%.*]] = icmp eq i32* [[X:%.*]], null
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    [[RV:%.*]] = or i1 [[RV1]], [[NOT_C]]
+; CHECK-NEXT:    ret i1 [[RV]]
+;
+  %R = select i1 %C, i32* %X, i32* null
+  %RV = icmp eq i32* %R, null
+  ret i1 %RV
+}
+
+define i32 @test18(i32 %X, i32 %Y, i1 %C) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[V:%.*]] = sdiv i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %R = select i1 %C, i32 %X, i32 0
+  %V = sdiv i32 %Y, %R
+  ret i32 %V
+}
+
+define i32 @test19(i32 %x) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
+; CHECK-NEXT:    ret i32 [[X_LOBIT]]
+;
+  %tmp = icmp ugt i32 %x, 2147483647
+  %retval = select i1 %tmp, i32 -1, i32 0
+  ret i32 %retval
+}
+
+define i32 @test20(i32 %x) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
+; CHECK-NEXT:    ret i32 [[X_LOBIT]]
+;
+  %tmp = icmp slt i32 %x, 0
+  %retval = select i1 %tmp, i32 -1, i32 0
+  ret i32 %retval
+}
+
+define i64 @test21(i32 %x) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X_LOBIT]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %tmp = icmp slt i32 %x, 0
+  %retval = select i1 %tmp, i64 -1, i64 0
+  ret i64 %retval
+}
+
+define i16 @test22(i32 %x) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X_LOBIT]] to i16
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %tmp = icmp slt i32 %x, 0
+  %retval = select i1 %tmp, i16 -1, i16 0
+  ret i16 %retval
+}
+
+define i32 @test25(i1 %c)  {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
+; CHECK:       jump:
+; CHECK-NEXT:    br label [[RET]]
+; CHECK:       ret:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %c, label %jump, label %ret
+jump:
+  br label %ret
+ret:
+  %a = phi i1 [true, %jump], [false, %entry]
+  %b = select i1 %a, i32 10, i32 20
+  ret i32 %b
+}
+
+define i32 @test26(i1 %cond)  {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
+; CHECK:       jump:
+; CHECK-NEXT:    br label [[RET]]
+; CHECK:       ret:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 20, [[ENTRY:%.*]] ], [ 10, [[JUMP]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  %c = or i1 false, false
+  br label %ret
+ret:
+  %a = phi i1 [true, %entry], [%c, %jump]
+  %b = select i1 %a, i32 20, i32 10
+  ret i32 %b
+}
+
+define i32 @test27(i1 %c, i32 %A, i32 %B)  {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
+; CHECK:       jump:
+; CHECK-NEXT:    br label [[RET]]
+; CHECK:       ret:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[P]]
+;
+entry:
+  br i1 %c, label %jump, label %ret
+jump:
+  br label %ret
+ret:
+  %p = phi i1 [true, %jump], [false, %entry]
+  %s = select i1 %p, i32 %A, i32 %B
+  ret i32 %s
+}
+
+define i32 @test28(i1 %cond, i32 %A, i32 %B)  {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
+; CHECK:       jump:
+; CHECK-NEXT:    br label [[RET]]
+; CHECK:       ret:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[P]]
+;
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  br label %ret
+ret:
+  %c = phi i32 [%A, %jump], [%B, %entry]
+  %p = phi i1 [true, %jump], [false, %entry]
+  %s = select i1 %p, i32 %A, i32 %c
+  ret i32 %s
+}
+
+define i32 @test29(i1 %cond, i32 %A, i32 %B)  {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
+; CHECK:       jump:
+; CHECK-NEXT:    br label [[RET]]
+; CHECK:       ret:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i32 [[P]]
+;
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  br label %ret
+ret:
+  %c = phi i32 [%A, %jump], [%B, %entry]
+  %p = phi i1 [true, %jump], [false, %entry]
+  br label %next
+
+next:
+  %s = select i1 %p, i32 %A, i32 %c
+  ret i32 %s
+}
+
+; SMAX(SMAX(x, y), x) -> SMAX(x, y)
+define i32 @test30(i32 %x, i32 %y) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %cond, i32 %x
+  ret i32 %retval
+}
+
+; UMAX(UMAX(x, y), x) -> UMAX(x, y)
+define i32 @test31(i32 %x, i32 %y) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp ugt i32 %x, %y
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  %cmp5 = icmp ugt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %cond, i32 %x
+  ret i32 %retval
+}
+
+; SMIN(SMIN(x, y), x) -> SMIN(x, y)
+define i32 @test32(i32 %x, i32 %y) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %y, i32 %x
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %x, i32 %cond
+  ret i32 %retval
+}
+
+; MAX(MIN(x, y), x) -> x
+define i32 @test33(i32 %x, i32 %y) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %y, i32 %x
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %cond, i32 %x
+  ret i32 %retval
+}
+
+; MIN(MAX(x, y), x) -> x
+define i32 @test34(i32 %x, i32 %y) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %x, i32 %cond
+  ret i32 %retval
+}
+
+define i1 @test38(i1 %cond) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    ret i1 false
+;
+  %zero = alloca i32
+  %one = alloca i32
+  %ptr = select i1 %cond, i32* %zero, i32* %one
+  %isnull = icmp eq i32* %ptr, null
+  ret i1 %isnull
+}
+
+define i1 @test39(i1 %cond, double %x) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:    ret i1 true
+;
+  %s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
+  %cmp = fcmp ule double %x, %s
+  ret i1 %cmp
+}
+
+define i1 @test40(i1 %cond) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  %s = select i1 %cond, i32* %a, i32* %b
+  %r = icmp eq i32* %s, %c
+  ret i1 %r
+}
+
+define i32 @test41(i1 %cond, i32 %x, i32 %y) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %z = and i32 %x, %y
+  %s = select i1 %cond, i32 %y, i32 %z
+  %r = and i32 %x, %s
+  ret i32 %r
+}
+
+define i32 @test42(i32 %x, i32 %y) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[COND]] to i32
+; CHECK-NEXT:    [[C:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %b = add i32 %y, -1
+  %cond = icmp eq i32 %x, 0
+  %c = select i1 %cond, i32 %b, i32 %y
+  ret i32 %c
+}
+
+define <2 x i32> @test42vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test42vec(
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[COND]] to <2 x i32>
+; CHECK-NEXT:    [[C:%.*]] = sub <2 x i32> [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
+  %b = add <2 x i32> %y, <i32 -1, i32 -1>
+  %cond = icmp eq <2 x i32> %x, zeroinitializer
+  %c = select <2 x i1> %cond, <2 x i32> %b, <2 x i32> %y
+  ret <2 x i32> %c
+}
+
+; PR8994
+
+; This select instruction can't be eliminated because trying to do so would
+; change the number of vector elements. This used to assert.
+define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[SELECT:%.*]] = select <3 x i1> [[ICMP:%.*]], <3 x i16> zeroinitializer, <3 x i16> [[TMP:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <3 x i16> [[SELECT]] to i48
+; CHECK-NEXT:    ret i48 [[TMP2]]
+;
+  %select = select <3 x i1> %icmp, <3 x i16> zeroinitializer, <3 x i16> %tmp
+  %tmp2 = bitcast <3 x i16> %select to i48
+  ret i48 %tmp2
+}
+
+; Allow select promotion even if there are multiple uses of bitcasted ops.
+; Hoisting the selects allows later pattern matching to see that these are min/max ops.
+
+define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @min_max_bitcast(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[A]], <4 x float> [[B]]
+; CHECK-NEXT:    [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B]], <4 x float> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32>* [[PTR1:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32>* [[PTR2:%.*]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[SEL2_V]], <4 x float>* [[TMP2]], align 16
+; CHECK-NEXT:    ret void
+;
+  %cmp = fcmp olt <4 x float> %a, %b
+  %bc1 = bitcast <4 x float> %a to <4 x i32>
+  %bc2 = bitcast <4 x float> %b to <4 x i32>
+  %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
+  %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
+  store <4 x i32> %sel1, <4 x i32>* %ptr1
+  store <4 x i32> %sel2, <4 x i32>* %ptr2
+  ret void
+}
+
+; To avoid potential backend problems, we don't do the same transform for other casts.
+
+define void @truncs_before_selects(<4 x float> %f1, <4 x float> %f2, <4 x i64> %a, <4 x i64> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @truncs_before_selects(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> [[F1:%.*]], [[F2:%.*]]
+; CHECK-NEXT:    [[BC1:%.*]] = trunc <4 x i64> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[BC2:%.*]] = trunc <4 x i64> [[B:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SEL1:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC1]], <4 x i32> [[BC2]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC2]], <4 x i32> [[BC1]]
+; CHECK-NEXT:    store <4 x i32> [[SEL1]], <4 x i32>* [[PTR1:%.*]], align 16
+; CHECK-NEXT:    store <4 x i32> [[SEL2]], <4 x i32>* [[PTR2:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %cmp = fcmp olt <4 x float> %f1, %f2
+  %bc1 = trunc <4 x i64> %a to <4 x i32>
+  %bc2 = trunc <4 x i64> %b to <4 x i32>
+  %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
+  %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
+  store <4 x i32> %sel1, <4 x i32>* %ptr1, align 16
+  store <4 x i32> %sel2, <4 x i32>* %ptr2, align 16
+  ret void
+}
+
+; PR8575
+
+define i32 @test52(i32 %n, i32 %m) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], [[M:%.*]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = select i1 [[CMP]], i32 1, i32 6
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %cmp = icmp sgt i32 %n, %m
+  %. = select i1 %cmp, i32 1, i32 3
+  %add = add nsw i32 %., 3
+  %storemerge = select i1 %cmp, i32 %., i32 %add
+  ret i32 %storemerge
+}
+
+; PR9454
+
+define i32 @test53(i32 %x) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], [[X]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %and = and i32 %x, 2
+  %cmp = icmp eq i32 %and, %x
+  %sel = select i1 %cmp, i32 2, i32 1
+  ret i32 %sel
+}
+
+define i32 @test54(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[C:%.*]] = zext i1 [[B]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = ashr exact i32 %X, %Y
+  %B = icmp eq i32 %A, 0
+  %C = select i1 %B, i32 %A, i32 1
+  ret i32 %C
+}
+
+define i1 @test55(i1 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[Y:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %A = ashr exact i32 %Y, %Z
+  %B = select i1 %X, i32 %Y, i32 %A
+  %C = icmp eq i32 %B, 0
+  ret i1 %C
+}
+
+define i32 @test56(i16 %x) {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[X:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %tobool = icmp eq i16 %x, 0
+  %conv = zext i16 %x to i32
+  %cond = select i1 %tobool, i32 0, i32 %conv
+  ret i32 %cond
+}
+
+define i32 @test57(i32 %x, i32 %y) {
+; CHECK-LABEL: @test57(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %and = and i32 %x, %y
+  %tobool = icmp eq i32 %x, 0
+  %.and = select i1 %tobool, i32 0, i32 %and
+  ret i32 %.and
+}
+
+define i32 @test58(i16 %x) {
+; CHECK-LABEL: @test58(
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[X:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %tobool = icmp ne i16 %x, 1
+  %conv = zext i16 %x to i32
+  %cond = select i1 %tobool, i32 %conv, i32 1
+  ret i32 %cond
+}
+
+define i32 @test59(i32 %x, i32 %y) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %and = and i32 %x, %y
+  %tobool = icmp ne i32 %x, %y
+  %.and = select i1 %tobool, i32 %and, i32 %y
+  ret i32 %.and
+}
+
+define i1 @test60(i32 %x, i1* %y) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[LOAD:%.*]] = load i1, i1* [[Y:%.*]], align 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i1 [[LOAD]], i1 [[CMP1]]
+; CHECK-NEXT:    ret i1 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 0
+  %load = load i1, i1* %y, align 1
+  %cmp1 = icmp slt i32 %x, 1
+  %sel = select i1 %cmp, i1 %load, i1 %cmp1
+  ret i1 %sel
+}
+
+@glbl = constant i32 10
+define i32 @test61(i32* %ptr) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT:    ret i32 10
+;
+  %A = load i32, i32* %ptr
+  %B = icmp eq i32* %ptr, @glbl
+  %C = select i1 %B, i32 %A, i32 10
+  ret i32 %C
+}
+
+; PR14131
+define void @test64(i32 %p, i16 %b) noreturn {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[LOR_RHS:%.*]], label [[LOR_END:%.*]]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    br i1 true, label [[COND_END17:%.*]], label [[COND_FALSE16:%.*]]
+; CHECK:       cond.false16:
+; CHECK-NEXT:    br label [[COND_END17]]
+; CHECK:       cond.end17:
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    br label [[WHILE_BODY]]
+;
+entry:
+  %p.addr.0.insert.mask = and i32 %p, -65536
+  %conv2 = and i32 %p, 65535
+  br i1 undef, label %lor.rhs, label %lor.end
+
+lor.rhs:
+  %p.addr.0.extract.trunc = trunc i32 %p.addr.0.insert.mask to i16
+  %phitmp = zext i16 %p.addr.0.extract.trunc to i32
+  br label %lor.end
+
+lor.end:
+  %t.1 = phi i32 [ 0, %entry ], [ %phitmp, %lor.rhs ]
+  %conv6 = zext i16 %b to i32
+  %div = udiv i32 %conv6, %t.1
+  %tobool8 = icmp eq i32 %div, 0
+  %cmp = icmp eq i32 %t.1, 0
+  %cmp12 = icmp ult i32 %conv2, 2
+  %cmp.sink = select i1 %tobool8, i1 %cmp12, i1 %cmp
+  br i1 %cmp.sink, label %cond.end17, label %cond.false16
+
+cond.false16:
+  br label %cond.end17
+
+cond.end17:
+  br label %while.body
+
+while.body:
+  br label %while.body
+}
+
+@under_aligned = external global i32, align 1
+
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferenceable but may have a lower alignment than the
+; load does.
+define i32 @test76(i1 %flag, i32* %x) {
+; CHECK-LABEL: @test76(
+; CHECK-NEXT:    store i32 0, i32* [[X:%.*]], align 4
+; CHECK-NEXT:    [[P:%.*]] = select i1 [[FLAG:%.*]], i32* @under_aligned, i32* [[X]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  store i32 0, i32* %x
+  %p = select i1 %flag, i32* @under_aligned, i32* %x
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+declare void @scribble_on_i32(i32*)
+
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferenceable but may have a lower alignment than the
+; load does.
+
+define i32 @test77(i1 %flag, i32* %x) {
+; CHECK-LABEL: @test77(
+; CHECK-NEXT:    [[UNDER_ALIGNED:%.*]] = alloca i32, align 1
+; CHECK-NEXT:    call void @scribble_on_i32(i32* nonnull [[UNDER_ALIGNED]])
+; CHECK-NEXT:    store i32 0, i32* [[X:%.*]], align 4
+; CHECK-NEXT:    [[P:%.*]] = select i1 [[FLAG:%.*]], i32* [[UNDER_ALIGNED]], i32* [[X]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %under_aligned = alloca i32, align 1
+  call void @scribble_on_i32(i32* %under_aligned)
+  store i32 0, i32* %x
+  %p = select i1 %flag, i32* %under_aligned, i32* %x
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test78(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 0, i32* [[X:%.*]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[Y:%.*]], align 4
+; CHECK-NEXT:    store i32 42, i32* [[Z:%.*]], align 4
+; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT:    [[Y_VAL:%.*]] = load i32, i32* [[Y]], align 4
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[FLAG:%.*]], i32 [[X_VAL]], i32 [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+define i32 @test78_deref(i1 %flag, i32* dereferenceable(4) %x, i32* dereferenceable(4) %y, i32* %z) {
+; CHECK-LABEL: @test78_deref(
+; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, i32* [[X:%.*]], align 4
+; CHECK-NEXT:    [[Y_VAL:%.*]] = load i32, i32* [[Y:%.*]], align 4
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[FLAG:%.*]], i32 [[X_VAL]], i32 [[Y_VAL]]
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+; The same as @test78 but we can't speculate the load because it can trap
+; if under-aligned.
+define i32 @test78_neg(i1 %flag, i32* %x, i32* %y, i32* %z) {
+; CHECK-LABEL: @test78_neg(
+; CHECK-NEXT:    store i32 0, i32* [[X:%.*]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[Y:%.*]], align 4
+; CHECK-NEXT:    store i32 42, i32* [[Z:%.*]], align 4
+; CHECK-NEXT:    [[P:%.*]] = select i1 [[FLAG:%.*]], i32* [[X]], i32* [[Y]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P]], align 16
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32, i32* %p, align 16
+  ret i32 %v
+}
+
+; The same as @test78_deref but we can't speculate the load because
+; one of the arguments is not sufficiently dereferenceable.
+define i32 @test78_deref_neg(i1 %flag, i32* dereferenceable(2) %x, i32* dereferenceable(4) %y, i32* %z) {
+; CHECK-LABEL: @test78_deref_neg(
+; CHECK-NEXT:    [[P:%.*]] = select i1 [[FLAG:%.*]], i32* [[X:%.*]], i32* [[Y:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
+; CHECK-LABEL: @test79(
+; CHECK-NEXT:    [[X1:%.*]] = bitcast float* [[X:%.*]] to i32*
+; CHECK-NEXT:    [[Y1:%.*]] = bitcast i32* [[Y:%.*]] to float*
+; CHECK-NEXT:    store i32 0, i32* [[X1]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[Y]], align 4
+; CHECK-NEXT:    store i32 42, i32* [[Z:%.*]], align 4
+; CHECK-NEXT:    [[X_VAL:%.*]] = load float, float* [[X]], align 4
+; CHECK-NEXT:    [[Y1_VAL:%.*]] = load float, float* [[Y1]], align 4
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[FLAG:%.*]], float [[X_VAL]], float [[Y1_VAL]]
+; CHECK-NEXT:    ret float [[V]]
+;
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  store i32 0, i32* %x1
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, float* %x, float* %y1
+  %v = load float, float* %p
+  ret float %v
+}
+
+; Test that when we speculate the loads around the select they fold throug
+; load->load folding and load->store folding.
+define i32 @test80(i1 %flag) {
+; CHECK-LABEL: @test80(
+; CHECK-NEXT:    [[X:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[Y:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @scribble_on_i32(i32* nonnull [[X]])
+; CHECK-NEXT:    call void @scribble_on_i32(i32* nonnull [[Y]])
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT:    store i32 [[TMP]], i32* [[Y]], align 4
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+  %x = alloca i32
+  %y = alloca i32
+  call void @scribble_on_i32(i32* %x)
+  call void @scribble_on_i32(i32* %y)
+  %tmp = load i32, i32* %x
+  store i32 %tmp, i32* %y
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+define float @test81(i1 %flag) {
+; CHECK-LABEL: @test81(
+; CHECK-NEXT:    [[X:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[Y:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @scribble_on_i32(i32* nonnull [[X]])
+; CHECK-NEXT:    call void @scribble_on_i32(i32* nonnull [[Y]])
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT:    store i32 [[TMP]], i32* [[Y]], align 4
+; CHECK-NEXT:    [[V:%.*]] = bitcast i32 [[TMP]] to float
+; CHECK-NEXT:    ret float [[V]]
+;
+  %x = alloca float
+  %y = alloca i32
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  call void @scribble_on_i32(i32* %x1)
+  call void @scribble_on_i32(i32* %y)
+  %tmp = load i32, i32* %x1
+  store i32 %tmp, i32* %y
+  %p = select i1 %flag, float* %x, float* %y1
+  %v = load float, float* %p
+  ret float %v
+}
+
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+define i32 @test82(i1 %flag) {
+; CHECK-LABEL: @test82(
+; CHECK-NEXT:    [[X:%.*]] = alloca float, align 4
+; CHECK-NEXT:    [[Y:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[X1:%.*]] = bitcast float* [[X]] to i32*
+; CHECK-NEXT:    [[Y1:%.*]] = bitcast i32* [[Y]] to float*
+; CHECK-NEXT:    call void @scribble_on_i32(i32* nonnull [[X1]])
+; CHECK-NEXT:    call void @scribble_on_i32(i32* nonnull [[Y]])
+; CHECK-NEXT:    [[TMP:%.*]] = load float, float* [[X]], align 4
+; CHECK-NEXT:    store float [[TMP]], float* [[Y1]], align 4
+; CHECK-NEXT:    [[V:%.*]] = bitcast float [[TMP]] to i32
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %x = alloca float
+  %y = alloca i32
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  call void @scribble_on_i32(i32* %x1)
+  call void @scribble_on_i32(i32* %y)
+  %tmp = load float, float* %x
+  store float %tmp, float* %y1
+  %p = select i1 %flag, i32* %x1, i32* %y
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+declare void @scribble_on_i64(i64*)
+declare void @scribble_on_i128(i128*)
+
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers and requires inttoptr casts.
+define i8* @test83(i1 %flag) {
+; CHECK-LABEL: @test83(
+; CHECK-NEXT:    [[X:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[Y:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[TMPCAST:%.*]] = bitcast i8** [[Y]] to i64*
+; CHECK-NEXT:    [[X1:%.*]] = bitcast i8** [[X]] to i64*
+; CHECK-NEXT:    call void @scribble_on_i64(i64* nonnull [[X1]])
+; CHECK-NEXT:    call void @scribble_on_i64(i64* nonnull [[TMPCAST]])
+; CHECK-NEXT:    [[TMP:%.*]] = load i64, i64* [[X1]], align 8
+; CHECK-NEXT:    store i64 [[TMP]], i64* [[TMPCAST]], align 8
+; CHECK-NEXT:    [[V:%.*]] = inttoptr i64 [[TMP]] to i8*
+; CHECK-NEXT:    ret i8* [[V]]
+;
+  %x = alloca i8*
+  %y = alloca i64
+  %x1 = bitcast i8** %x to i64*
+  %y1 = bitcast i64* %y to i8**
+  call void @scribble_on_i64(i64* %x1)
+  call void @scribble_on_i64(i64* %y)
+  %tmp = load i64, i64* %x1
+  store i64 %tmp, i64* %y
+  %p = select i1 %flag, i8** %x, i8** %y1
+  %v = load i8*, i8** %p
+  ret i8* %v
+}
+
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers and requires a ptrtoint cast.
+define i64 @test84(i1 %flag) {
+; CHECK-LABEL: @test84(
+; CHECK-NEXT:    [[X:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[Y:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[TMPCAST:%.*]] = bitcast i8** [[Y]] to i64*
+; CHECK-NEXT:    [[X1:%.*]] = bitcast i8** [[X]] to i64*
+; CHECK-NEXT:    call void @scribble_on_i64(i64* nonnull [[X1]])
+; CHECK-NEXT:    call void @scribble_on_i64(i64* nonnull [[TMPCAST]])
+; CHECK-NEXT:    [[TMP:%.*]] = load i8*, i8** [[X]], align 8
+; CHECK-NEXT:    store i8* [[TMP]], i8** [[Y]], align 8
+; CHECK-NEXT:    [[V:%.*]] = ptrtoint i8* [[TMP]] to i64
+; CHECK-NEXT:    ret i64 [[V]]
+;
+  %x = alloca i8*
+  %y = alloca i64
+  %x1 = bitcast i8** %x to i64*
+  %y1 = bitcast i64* %y to i8**
+  call void @scribble_on_i64(i64* %x1)
+  call void @scribble_on_i64(i64* %y)
+  %tmp = load i8*, i8** %x
+  store i8* %tmp, i8** %y1
+  %p = select i1 %flag, i64* %x1, i64* %y
+  %v = load i64, i64* %p
+  ret i64 %v
+}
+
+; Test that we can't speculate the load around the select. The load of the
+; pointer doesn't load all of the stored integer bits. We could fix this, but it
+; would require endianness checks and other nastiness.
+define i8* @test85(i1 %flag) {
+; CHECK-LABEL: @test85(
+; CHECK-NEXT:    [[X1:%.*]] = alloca [2 x i8*], align 8
+; CHECK-NEXT:    [[Y:%.*]] = alloca i128, align 8
+; CHECK-NEXT:    [[X1_SUB:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[X1]], i64 0, i64 0
+; CHECK-NEXT:    [[X2:%.*]] = bitcast [2 x i8*]* [[X1]] to i128*
+; CHECK-NEXT:    [[Y1:%.*]] = bitcast i128* [[Y]] to i8**
+; CHECK-NEXT:    call void @scribble_on_i128(i128* nonnull [[X2]])
+; CHECK-NEXT:    call void @scribble_on_i128(i128* nonnull [[Y]])
+; CHECK-NEXT:    [[TMP:%.*]] = load i128, i128* [[X2]], align 8
+; CHECK-NEXT:    store i128 [[TMP]], i128* [[Y]], align 8
+; CHECK-NEXT:    [[X1_SUB_VAL:%.*]] = load i8*, i8** [[X1_SUB]], align 8
+; CHECK-NEXT:    [[Y1_VAL:%.*]] = load i8*, i8** [[Y1]], align 8
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[FLAG:%.*]], i8* [[X1_SUB_VAL]], i8* [[Y1_VAL]]
+; CHECK-NEXT:    ret i8* [[V]]
+;
+  %x = alloca [2 x i8*]
+  %y = alloca i128
+  %x1 = bitcast [2 x i8*]* %x to i8**
+  %x2 = bitcast i8** %x1 to i128*
+  %y1 = bitcast i128* %y to i8**
+  call void @scribble_on_i128(i128* %x2)
+  call void @scribble_on_i128(i128* %y)
+  %tmp = load i128, i128* %x2
+  store i128 %tmp, i128* %y
+  %p = select i1 %flag, i8** %x1, i8** %y1
+  %v = load i8*, i8** %p
+  ret i8* %v
+}
+
+; Test that we can't speculate the load around the select when the integer size
+; is larger than the pointer size. The store of the pointer doesn't store to all
+; the bits of the integer.
+define i128 @test86(i1 %flag) {
+; CHECK-LABEL: @test86(
+; CHECK-NEXT:    [[X1:%.*]] = alloca [2 x i8*], align 8
+; CHECK-NEXT:    [[Y:%.*]] = alloca i128, align 8
+; CHECK-NEXT:    [[X1_SUB:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[X1]], i64 0, i64 0
+; CHECK-NEXT:    [[X2:%.*]] = bitcast [2 x i8*]* [[X1]] to i128*
+; CHECK-NEXT:    [[Y1:%.*]] = bitcast i128* [[Y]] to i8**
+; CHECK-NEXT:    call void @scribble_on_i128(i128* nonnull [[X2]])
+; CHECK-NEXT:    call void @scribble_on_i128(i128* nonnull [[Y]])
+; CHECK-NEXT:    [[TMP:%.*]] = load i8*, i8** [[X1_SUB]], align 8
+; CHECK-NEXT:    store i8* [[TMP]], i8** [[Y1]], align 8
+; CHECK-NEXT:    [[X2_VAL:%.*]] = load i128, i128* [[X2]], align 8
+; CHECK-NEXT:    [[Y_VAL:%.*]] = load i128, i128* [[Y]], align 8
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[FLAG:%.*]], i128 [[X2_VAL]], i128 [[Y_VAL]]
+; CHECK-NEXT:    ret i128 [[V]]
+;
+  %x = alloca [2 x i8*]
+  %y = alloca i128
+  %x1 = bitcast [2 x i8*]* %x to i8**
+  %x2 = bitcast i8** %x1 to i128*
+  %y1 = bitcast i128* %y to i8**
+  call void @scribble_on_i128(i128* %x2)
+  call void @scribble_on_i128(i128* %y)
+  %tmp = load i8*, i8** %x1
+  store i8* %tmp, i8** %y1
+  %p = select i1 %flag, i128* %x2, i128* %y
+  %v = load i128, i128* %p
+  ret i128 %v
+}
+
+define i32 @test_select_select0(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
+; CHECK-LABEL: @test_select_select0(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt i32 [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[S0:%.*]] = select i1 [[C0]], i32 [[R1:%.*]], i32 [[R0:%.*]]
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt i32 [[A]], [[V2:%.*]]
+; CHECK-NEXT:    [[S1:%.*]] = select i1 [[C1]], i32 [[S0]], i32 [[R1]]
+; CHECK-NEXT:    ret i32 [[S1]]
+;
+  %c0 = icmp sge i32 %a, %v1
+  %s0 = select i1 %c0, i32 %r0, i32 %r1
+  %c1 = icmp slt i32 %a, %v2
+  %s1 = select i1 %c1, i32 %s0, i32 %r1
+  ret i32 %s1
+}
+
+define i32 @test_select_select1(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
+; CHECK-LABEL: @test_select_select1(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt i32 [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[S0:%.*]] = select i1 [[C0]], i32 [[R1:%.*]], i32 [[R0:%.*]]
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt i32 [[A]], [[V2:%.*]]
+; CHECK-NEXT:    [[S1:%.*]] = select i1 [[C1]], i32 [[R0]], i32 [[S0]]
+; CHECK-NEXT:    ret i32 [[S1]]
+;
+  %c0 = icmp sge i32 %a, %v1
+  %s0 = select i1 %c0, i32 %r0, i32 %r1
+  %c1 = icmp slt i32 %a, %v2
+  %s1 = select i1 %c1, i32 %r0, i32 %s0
+  ret i32 %s1
+}
+
+define i32 @PR23757(i32 %x) {
+; CHECK-LABEL: @PR23757(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 -2147483648, i32 [[ADD]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp eq i32 %x, 2147483647
+  %add = add nsw i32 %x, 1
+  %sel = select i1 %cmp, i32 -2147483648, i32 %add
+  ret i32 %sel
+}
+
+; max(max(~a, -1), -1) --> ~min(a, 0)
+
+define i32 @PR27137(i32 %a) {
+; CHECK-LABEL: @PR27137(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 0
+; CHECK-NEXT:    [[S1:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    ret i32 [[S1]]
+;
+  %not_a = xor i32 %a, -1
+  %c0 = icmp slt i32 %a, 0
+  %s0 = select i1 %c0, i32 %not_a, i32 -1
+  %c1 = icmp sgt i32 %s0, -1
+  %s1 = select i1 %c1, i32 %s0, i32 -1
+  ret i32 %s1
+}
+
+define i32 @select_icmp_slt0_xor(i32 %x) {
+; CHECK-LABEL: @select_icmp_slt0_xor(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %cmp = icmp slt i32 %x, zeroinitializer
+  %xor = xor i32 %x, 2147483648
+  %x.xor = select i1 %cmp, i32 %x, i32 %xor
+  ret i32 %x.xor
+}
+
+define <2 x i32> @select_icmp_slt0_xor_vec(<2 x i32> %x) {
+; CHECK-LABEL: @select_icmp_slt0_xor_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %cmp = icmp slt <2 x i32> %x, zeroinitializer
+  %xor = xor <2 x i32> %x, <i32 2147483648, i32 2147483648>
+  %x.xor = select <2 x i1> %cmp, <2 x i32> %x, <2 x i32> %xor
+  ret <2 x i32> %x.xor
+}
+
+define <4 x i32> @canonicalize_to_shuffle(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @canonicalize_to_shuffle(
+; CHECK-NEXT:    [[SEL:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[SEL]]
+;
+  %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
+}
+
+; Undef elements of the select condition may not be translated into undef elements of a shuffle mask
+; because undef in a shuffle mask means we can return anything, not just one of the selected values.
+; https://bugs.llvm.org/show_bug.cgi?id=32486
+
+define <4 x i32> @undef_elts_in_condition(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @undef_elts_in_condition(
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> <i1 true, i1 undef, i1 false, i1 undef>, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[SEL]]
+;
+  %sel = select <4 x i1> <i1 true, i1 undef, i1 false, i1 undef>, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
+}
+
+; Don't die or try if the condition mask is a constant expression or contains a constant expression.
+
+@g = global i32 0
+
+define <4 x i32> @cannot_canonicalize_to_shuffle1(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cannot_canonicalize_to_shuffle1(
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> bitcast (i4 ptrtoint (i32* @g to i4) to <4 x i1>), <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[SEL]]
+;
+  %sel = select <4 x i1> bitcast (i4 ptrtoint (i32* @g to i4) to <4 x i1>), <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
+}
+
+define <4 x i32> @cannot_canonicalize_to_shuffle2(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cannot_canonicalize_to_shuffle2(
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> <i1 true, i1 undef, i1 false, i1 icmp sle (i16 ptrtoint (i32* @g to i16), i16 4)>, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[SEL]]
+;
+  %sel = select <4 x i1> <i1 true, i1 undef, i1 false, i1 icmp sle (i16 ptrtoint (i32* @g to i16), i16 4)>, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
+}
+
+declare void @llvm.assume(i1)
+
+define i8 @assume_cond_true(i1 %cond, i8 %x, i8 %y) {
+; CHECK-LABEL: @assume_cond_true(
+; CHECK-NEXT:    call void @llvm.assume(i1 [[COND:%.*]])
+; CHECK-NEXT:    ret i8 [[X:%.*]]
+;
+  call void @llvm.assume(i1 %cond)
+  %sel = select i1 %cond, i8 %x, i8 %y
+  ret i8 %sel
+}
+
+; computeKnownBitsFromAssume() understands the 'not' of an assumed condition.
+
+define i8 @assume_cond_false(i1 %cond, i8 %x, i8 %y) {
+; CHECK-LABEL: @assume_cond_false(
+; CHECK-NEXT:    [[NOTCOND:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTCOND]])
+; CHECK-NEXT:    ret i8 [[Y:%.*]]
+;
+  %notcond = xor i1 %cond, true
+  call void @llvm.assume(i1 %notcond)
+  %sel = select i1 %cond, i8 %x, i8 %y
+  ret i8 %sel
+}
+
+; Test case to make sure we don't consider an all ones float values for converting the select into a sext.
+define <4 x float> @PR33721(<4 x float> %w) {
+; CHECK-LABEL: @PR33721(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fcmp ole <4 x float> [[W:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <4 x float> zeroinitializer
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+entry:
+  %0 = fcmp ole <4 x float> %w, zeroinitializer
+  %1 = select <4 x i1> %0, <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <4 x float> zeroinitializer
+  ret <4 x float> %1
+}
+
+; select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z)
+define i8 @test87(i1 %cond, i8 %w, i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @test87(
+; CHECK-NEXT:    [[B:%.*]] = add i8 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], i8 [[B]], i8 [[Z:%.*]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = select i1 %cond, i8 %x, i8 %y
+  %b = add i8 %a, %w
+  %c = select i1 %cond, i8 %b, i8 %z
+  ret i8 %c
+}
+
+; select(C, binop(select(C, X, Y), W), Z) -> select(C, Z, binop(Y, W))
+define i8 @test88(i1 %cond, i8 %w, i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @test88(
+; CHECK-NEXT:    [[B:%.*]] = sub i8 [[Y:%.*]], [[W:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], i8 [[Z:%.*]], i8 [[B]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = select i1 %cond, i8 %x, i8 %y
+  %b = sub i8 %a, %w
+  %c = select i1 %cond, i8 %z, i8 %b
+  ret i8 %c
+}
+
+; select(C, Z, binop(W, select(C, X, Y))) -> select(C, binop(X, W), Z)
+define i8 @test89(i1 %cond, i8 %w, i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @test89(
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], i8 [[B]], i8 [[Z:%.*]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = select i1 %cond, i8 %x, i8 %y
+  %b = and i8 %w, %a
+  %c = select i1 %cond, i8 %b, i8 %z
+  ret i8 %c
+}
+
+; select(C, Z, binop(W, select(C, X, Y))) -> select(C, Z, binop(W, Y))
+define i8 @test90(i1 %cond, i8 %w, i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @test90(
+; CHECK-NEXT:    [[B:%.*]] = or i8 [[Y:%.*]], [[W:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], i8 [[Z:%.*]], i8 [[B]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = select i1 %cond, i8 %x, i8 %y
+  %b = or i8 %w, %a
+  %c = select i1 %cond, i8 %z, i8 %b
+  ret i8 %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/select_arithmetic.ll b/llvm/test/Transforms/InstCombine/select_arithmetic.ll
new file mode 100644
index 00000000000..642fa6ccc85
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select_arithmetic.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Tests folding constants from two similar selects that feed a add
+
+define float @test1a(i1 zeroext %arg) #0 {
+; CHECK-LABEL: @test1a(
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[ARG:%.*]], float 6.000000e+00, float 1.500000e+01
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
+  %tmp1 = select i1 %arg, float 1.000000e+00, float 9.000000e+00
+  %tmp2 = fadd float %tmp, %tmp1
+  ret float %tmp2
+}
+
+; Tests folding multiple expression constants from similar selects that feed a adds
+
+define float @test1b(i1 zeroext %arg) #0 {
+; CHECK-LABEL: @test1b(
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[ARG:%.*]], float 7.250000e+00, float 2.800000e+01
+; CHECK-NEXT:    ret float [[TMP5]]
+;
+  %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
+  %tmp1 = select i1 %arg, float 1.000000e+00, float 9.000000e+00
+  %tmp2 = select i1 %arg, float 2.500000e-01, float 4.000000e+00
+  %tmp3 = fadd float %tmp, %tmp1
+  %tmp4 = fadd float %tmp2, %tmp1
+  %tmp5 = fadd float %tmp4, %tmp3
+  ret float %tmp5
+}
+
+; Tests folding constants from two similar selects that feed a sub
+
+define float @test2(i1 zeroext %arg) #0 {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[ARG:%.*]], float 4.000000e+00, float -3.000000e+00
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
+  %tmp1 = select i1 %arg, float 1.000000e+00, float 9.000000e+00
+  %tmp2 = fsub float %tmp, %tmp1
+  ret float %tmp2
+}
+
+; Tests folding constants from two similar selects that feed a mul
+
+define float @test3(i1 zeroext %arg) #0 {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[ARG:%.*]], float 5.000000e+00, float 5.400000e+01
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
+  %tmp1 = select i1 %arg, float 1.000000e+00, float 9.000000e+00
+  %tmp2 = fmul float %tmp, %tmp1
+  ret float %tmp2
+}
+
+declare void @use_float(float)
+
+; Tests folding constants if the selects have multiple uses but
+; we can fold away the binary op with a select.
+
+define float @test4(i1 zeroext %arg) #0 {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP:%.*]] = select i1 [[ARG:%.*]], float 5.000000e+00, float 6.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[ARG]], float 5.000000e+00, float 5.400000e+01
+; CHECK-NEXT:    call void @use_float(float [[TMP]])
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
+  %tmp1 = select i1 %arg, float 1.000000e+00, float 9.000000e+00
+  %tmp2 = fmul float %tmp, %tmp1
+  call void @use_float(float %tmp)
+  ret float %tmp2
+}
+
+; Tests not folding constants if we cannot fold away any of the selects.
+
+define float @test5(i1 zeroext %arg, float %div) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP:%.*]] = select i1 [[ARG:%.*]], float [[DIV:%.*]], float 5.000000e+00
+; CHECK-NEXT:    [[MUL:%.*]] = fmul contract float [[TMP]], [[TMP]]
+; CHECK-NEXT:    call void @use_float(float [[TMP]])
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %tmp = select i1 %arg, float %div, float 5.000000e+00
+  %mul = fmul contract float %tmp, %tmp
+  call void @use_float(float %tmp)
+  ret float %mul
+}
+
diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll
new file mode 100644
index 00000000000..67dd246c040
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select_meta.ll
@@ -0,0 +1,345 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; and enhanced to include metadata checking.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @foo(i32) local_unnamed_addr #0  {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 %0, 2
+; CHECK-NEXT:    [[DOTV:%.*]] = select i1 [[TMP2]], i32 20, i32 -20, !prof ![[$MD1:[0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[DOTV]], %0
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = icmp sgt i32 %0, 2
+  %3 = add nsw i32 %0, 20
+  %4 = add i32 %0, -20
+  select i1 %2, i32 %3, i32 %4, !prof !1
+  ret i32 %5
+}
+
+define i8 @shrink_select(i1 %cond, i32 %x) {
+; CHECK-LABEL: @shrink_select(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %x to i8
+; CHECK-NEXT:    [[TRUNC:%.*]] = select i1 %cond, i8 [[TMP1]], i8 42, !prof ![[$MD1]]
+; CHECK-NEXT:    ret i8 [[TRUNC]]
+;
+  %sel = select i1 %cond, i32 %x, i32 42, !prof !1
+  %trunc = trunc i32 %sel to i8
+  ret i8 %trunc
+}
+
+define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @min_max_bitcast(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> %a, %b
+; CHECK-NEXT:    [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %a, <4 x float> %b, !prof ![[$MD1]]
+; CHECK-NEXT:    [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a, !prof ![[$MD1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32>* %ptr1 to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32>* %ptr2 to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[SEL2_V]], <4 x float>* [[TMP2]], align 16
+; CHECK-NEXT:    ret void
+;
+  %cmp = fcmp olt <4 x float> %a, %b
+  %bc1 = bitcast <4 x float> %a to <4 x i32>
+  %bc2 = bitcast <4 x float> %b to <4 x i32>
+  %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2, !prof !1
+  %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1, !prof !1
+  store <4 x i32> %sel1, <4 x i32>* %ptr1
+  store <4 x i32> %sel2, <4 x i32>* %ptr2
+  ret void
+}
+
+define i32 @foo2(i32, i32) local_unnamed_addr #0  {
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 %0, 2
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 0, %1
+; CHECK-NEXT:    [[DOTP:%.*]] = select i1 [[TMP3]], i32 %1, i32 [[TMP4]], !prof ![[$MD1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[DOTP]], %0
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %3 = icmp sgt i32 %0, 2
+  %4 = add nsw i32 %0, %1
+  %5 = sub nsw i32 %0, %1
+  select i1 %3, i32 %4, i32 %5, !prof !1
+  ret i32 %6
+}
+
+define i64 @test43(i32 %a) nounwind {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[A_EXT]], 0
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[TMP1]], i64 [[A_EXT]], i64 0, !prof ![[$MD1]]
+; CHECK-NEXT:    ret i64 [[MAX]]
+;
+  %a_ext = sext i32 %a to i64
+  %is_a_nonnegative = icmp sgt i32 %a, -1
+  %max = select i1 %is_a_nonnegative, i64 %a_ext, i64 0, !prof !1
+  ret i64 %max
+}
+
+define <2 x i32> @scalar_select_of_vectors_sext(<2 x i1> %cca, i1 %ccb) {
+; CHECK-LABEL: @scalar_select_of_vectors_sext(
+; CHECK-NEXT:    [[NARROW:%.*]] = select i1 %ccb, <2 x i1> %cca, <2 x i1> zeroinitializer, !prof ![[$MD1]]
+; CHECK-NEXT:    [[R:%.*]] = sext <2 x i1> [[NARROW]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %ccax = sext <2 x i1> %cca to <2 x i32>
+  %r = select i1 %ccb, <2 x i32> %ccax, <2 x i32> <i32 0, i32 0>, !prof !1
+  ret <2 x i32> %r
+}
+
+
+define i16 @t7(i32 %a) {
+; CHECK-LABEL: @t7(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 %a, -32768
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 %a, i32 -32768, !prof ![[$MD1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[TMP3]]
+;
+  %1 = icmp slt i32 %a, -32768
+  %2 = trunc i32 %a to i16
+  %3 = select i1 %1, i16 %2, i16 -32768, !prof !1
+  ret i16 %3
+}
+
+define i32 @abs_nabs_x01(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 %x, 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, %x
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 %x, !prof ![[$MD3:[0-9]+]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x, !prof !1
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16, !prof !2
+  ret i32 %cond18
+}
+
+; Swap predicate / metadata order
+
+define <2 x i32> @abs_nabs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_nabs_x01_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> %x, zeroinitializer
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, %x
+; CHECK-NEXT:    [[COND1:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> %x, !prof ![[$MD3]]
+; CHECK-NEXT:    ret <2 x i32> [[COND1]]
+;
+  %cmp = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cond = select <2 x i1> %cmp, <2 x i32> %sub, <2 x i32> %x, !prof !1
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16, !prof !2
+  ret <2 x i32> %cond18
+}
+
+; SMAX(SMAX(x, y), x) -> SMAX(x, y)
+define i32 @test30(i32 %x, i32 %y) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 %x, %y
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 %x, i32 %y, !prof ![[$MD1]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %x, i32 %y, !prof !1
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %cond, i32 %x, !prof !2
+  ret i32 %retval
+}
+
+; SMAX(SMAX(75, X), 36) -> SMAX(X, 75)
+define i32 @test70(i32 %x) {
+; CHECK-LABEL: @test70(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 %x, 75
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP1]], i32 %x, i32 75, !prof ![[$MD3]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 75
+  %cond = select i1 %cmp, i32 75, i32 %x, !prof !1
+  %cmp3 = icmp slt i32 %cond, 36
+  %retval = select i1 %cmp3, i32 36, i32 %cond, !prof !2
+  ret i32 %retval
+}
+
+; Swap predicate / metadata order
+; SMIN(SMIN(X, 92), 11) -> SMIN(X, 11)
+define i32 @test72(i32 %x) {
+; CHECK-LABEL: @test72(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 %x, 11
+; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[TMP1]], i32 %x, i32 11, !prof ![[$MD4:[0-9]+]]
+; CHECK-NEXT:    ret i32 [[RETVAL]]
+;
+  %cmp = icmp sgt i32 %x, 92
+  %cond = select i1 %cmp, i32 92, i32 %x, !prof !1
+  %cmp3 = icmp sgt i32 %cond, 11
+  %retval = select i1 %cmp3, i32 11, i32 %cond, !prof !2
+  ret i32 %retval
+}
+
+; Swap predicate / metadata order
+; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
+define i32 @test74(i32 %x) {
+; CHECK-LABEL: @test74(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 %x, 75
+; CHECK-NEXT:    [[RETVAL:%.*]] = select i1 [[TMP1]], i32 %x, i32 75, !prof ![[$MD4]]
+; CHECK-NEXT:    ret i32 [[RETVAL]]
+;
+  %cmp = icmp slt i32 %x, 36
+  %cond = select i1 %cmp, i32 36, i32 %x, !prof !1
+  %cmp3 = icmp slt i32 %cond, 75
+  %retval = select i1 %cmp3, i32 75, i32 %cond, !prof !2
+  ret i32 %retval
+}
+
+; The xor is moved after the select. The metadata remains the same because the select operands are not swapped only inverted.
+define i32 @smin1(i32 %x) {
+; CHECK-LABEL: @smin1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0, !prof ![[$MD1]]
+; CHECK-NEXT:    [[SEL:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %not_x = xor i32 %x, -1
+  %cmp = icmp sgt i32 %x, 0
+  %sel = select i1 %cmp, i32 %not_x, i32 -1, !prof !1
+  ret i32 %sel
+}
+
+; The compare should change, and the metadata is swapped because the select operands are swapped and inverted.
+define i32 @smin2(i32 %x) {
+; CHECK-LABEL: @smin2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0, !prof ![[$MD3]]
+; CHECK-NEXT:    [[SEL:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %not_x = xor i32 %x, -1
+  %cmp = icmp slt i32 %x, 0
+  %sel = select i1 %cmp, i32 -1, i32 %not_x, !prof !1
+  ret i32 %sel
+}
+
+; The xor is moved after the select. The metadata remains the same because the select operands are not swapped only inverted.
+define i32 @smax1(i32 %x) {
+; CHECK-LABEL: @smax1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0, !prof ![[$MD1]]
+; CHECK-NEXT:    [[SEL:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %not_x = xor i32 %x, -1
+  %cmp = icmp slt i32 %x, 0
+  %sel = select i1 %cmp, i32 %not_x, i32 -1, !prof !1
+  ret i32 %sel
+}
+
+; The compare should change, and the metadata is swapped because the select operands are swapped and inverted.
+define i32 @smax2(i32 %x) {
+; CHECK-LABEL: @smax2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0, !prof ![[$MD3]]
+; CHECK-NEXT:    [[SEL:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %not_x = xor i32 %x, -1
+  %cmp = icmp sgt i32 %x, 0
+  %sel = select i1 %cmp, i32 -1, i32 %not_x, !prof !1
+  ret i32 %sel
+}
+
+; The compare should change, but the metadata remains the same because the select operands are not swapped.
+define i32 @umin1(i32 %x) {
+; CHECK-LABEL: @umin1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 %x, -2147483648
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 -2147483648, !prof ![[$MD1]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sel = select i1 %cmp, i32 %x, i32 -2147483648, !prof !1
+  ret i32 %sel
+}
+
+; The compare should change, and the metadata is swapped because the select operands are swapped.
+define i32 @umin2(i32 %x) {
+; CHECK-LABEL: @umin2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 %x, 2147483647
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 2147483647, !prof ![[$MD3]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sel = select i1 %cmp, i32 2147483647, i32 %x, !prof !1
+  ret i32 %sel
+}
+
+; The compare should change, but the metadata remains the same because the select operands are not swapped.
+define i32 @umax1(i32 %x) {
+; CHECK-LABEL: @umax1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 %x, 2147483647
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 2147483647, !prof ![[$MD1]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sel = select i1 %cmp, i32 %x, i32 2147483647, !prof !1
+  ret i32 %sel
+}
+
+; The compare should change, and the metadata is swapped because the select operands are swapped.
+define i32 @umax2(i32 %x) {
+; CHECK-LABEL: @umax2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 %x, -2147483648
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 -2147483648, !prof ![[$MD3]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sel = select i1 %cmp, i32 -2147483648, i32 %x, !prof !1
+  ret i32 %sel
+}
+
+; The condition is inverted, and the select ops are swapped. The metadata should be swapped.
+
+define i32 @not_cond(i1 %c, i32 %tv, i32 %fv) {
+; CHECK-LABEL: @not_cond(
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C:%.*]], i32 [[FV:%.*]], i32 [[TV:%.*]], !prof ![[$MD3]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %notc = xor i1 %c, true
+  %r = select i1 %notc, i32 %tv, i32 %fv, !prof !1
+  ret i32 %r
+}
+
+; The condition is inverted, and the select ops are swapped. The metadata should be swapped.
+
+define <2 x i32> @not_cond_vec(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv) {
+; CHECK-LABEL: @not_cond_vec(
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i32> [[FV:%.*]], <2 x i32> [[TV:%.*]], !prof ![[$MD3]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %notc = xor <2 x i1> %c, <i1 true, i1 true>
+  %r = select <2 x i1> %notc, <2 x i32> %tv, <2 x i32> %fv, !prof !1
+  ret <2 x i32> %r
+}
+
+; Should match vector 'not' with undef element.
+; The condition is inverted, and the select ops are swapped. The metadata should be swapped.
+
+define <2 x i32> @not_cond_vec_undef(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv) {
+; CHECK-LABEL: @not_cond_vec_undef(
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i32> [[FV:%.*]], <2 x i32> [[TV:%.*]], !prof ![[$MD3]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %notc = xor <2 x i1> %c, <i1 undef, i1 true>
+  %r = select <2 x i1> %notc, <2 x i32> %tv, <2 x i32> %fv, !prof !1
+  ret <2 x i32> %r
+}
+
+
+!1 = !{!"branch_weights", i32 2, i32 10}
+!2 = !{!"branch_weights", i32 3, i32 10}
+
+; CHECK-DAG: ![[$MD1]] = !{!"branch_weights", i32 2, i32 10}
+; CHECK-DAG: ![[$MD3]] = !{!"branch_weights", i32 10, i32 2}
+; CHECK-DAG: ![[$MD4]] = !{!"branch_weights", i32 10, i32 3}
+
diff --git a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll
new file mode 100644
index 00000000000..99ec4509838
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll
@@ -0,0 +1,300 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=37603
+
+; Pattern:
+;   (1 << NBits) - 1
+; Should be transformed into:
+;   ~(-(1 << NBits))
+; The `not` may end up being folded into `and`
+
+; ============================================================================ ;
+; Most basic positive tests
+; ============================================================================ ;
+
+; No no-wrap tags on shl
+
+define i32 @shl_add(i32 %NBits) {
+; CHECK-LABEL: @shl_add(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl i32 1, %NBits
+  %ret = add i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_add_nsw(i32 %NBits) {
+; CHECK-LABEL: @shl_add_nsw(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl i32 1, %NBits
+  %ret = add nsw i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_add_nuw(i32 %NBits) {
+; CHECK-LABEL: @shl_add_nuw(
+; CHECK-NEXT:    ret i32 -1
+;
+  %setbit = shl i32 1, %NBits
+  %ret = add nuw i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_add_nsw_nuw(i32 %NBits) {
+; CHECK-LABEL: @shl_add_nsw_nuw(
+; CHECK-NEXT:    ret i32 -1
+;
+  %setbit = shl i32 1, %NBits
+  %ret = add nuw nsw i32 %setbit, -1
+  ret i32 %ret
+}
+
+; shl is nsw
+
+define i32 @shl_nsw_add(i32 %NBits) {
+; CHECK-LABEL: @shl_nsw_add(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl nsw i32 1, %NBits
+  %ret = add i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nsw_add_nsw(i32 %NBits) {
+; CHECK-LABEL: @shl_nsw_add_nsw(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl nsw i32 1, %NBits
+  %ret = add nsw i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nsw_add_nuw(i32 %NBits) {
+; CHECK-LABEL: @shl_nsw_add_nuw(
+; CHECK-NEXT:    ret i32 -1
+;
+  %setbit = shl nsw i32 1, %NBits
+  %ret = add nuw i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nsw_add_nsw_nuw(i32 %NBits) {
+; CHECK-LABEL: @shl_nsw_add_nsw_nuw(
+; CHECK-NEXT:    ret i32 -1
+;
+  %setbit = shl nsw i32 1, %NBits
+  %ret = add nuw nsw i32 %setbit, -1
+  ret i32 %ret
+}
+
+; shl is nuw
+
+define i32 @shl_nuw_add(i32 %NBits) {
+; CHECK-LABEL: @shl_nuw_add(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl nuw i32 1, %NBits
+  %ret = add i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nuw_add_nsw(i32 %NBits) {
+; CHECK-LABEL: @shl_nuw_add_nsw(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl nuw i32 1, %NBits
+  %ret = add nsw i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nuw_add_nuw(i32 %NBits) {
+; CHECK-LABEL: @shl_nuw_add_nuw(
+; CHECK-NEXT:    ret i32 -1
+;
+  %setbit = shl nuw i32 1, %NBits
+  %ret = add nuw i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nuw_add_nsw_nuw(i32 %NBits) {
+; CHECK-LABEL: @shl_nuw_add_nsw_nuw(
+; CHECK-NEXT:    ret i32 -1
+;
+  %setbit = shl nuw i32 1, %NBits
+  %ret = add nuw nsw i32 %setbit, -1
+  ret i32 %ret
+}
+
+; shl is nuw nsw
+
+define i32 @shl_nsw_nuw_add(i32 %NBits) {
+; CHECK-LABEL: @shl_nsw_nuw_add(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl nuw nsw i32 1, %NBits
+  %ret = add i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nsw_nuw_add_nsw(i32 %NBits) {
+; CHECK-LABEL: @shl_nsw_nuw_add_nsw(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw i32 -1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor i32 [[NOTMASK]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl nuw nsw i32 1, %NBits
+  %ret = add nsw i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nsw_nuw_add_nuw(i32 %NBits) {
+; CHECK-LABEL: @shl_nsw_nuw_add_nuw(
+; CHECK-NEXT:    ret i32 -1
+;
+  %setbit = shl nuw nsw i32 1, %NBits
+  %ret = add nuw i32 %setbit, -1
+  ret i32 %ret
+}
+
+define i32 @shl_nsw_nuw_add_nsw_nuw(i32 %NBits) {
+; CHECK-LABEL: @shl_nsw_nuw_add_nsw_nuw(
+; CHECK-NEXT:    ret i32 -1
+;
+  %setbit = shl nuw nsw i32 1, %NBits
+  %ret = add nuw nsw i32 %setbit, -1
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Vectors
+; ============================================================================ ;
+
+define <2 x i32> @shl_add_vec(<2 x i32> %NBits) {
+; CHECK-LABEL: @shl_add_vec(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <2 x i32> <i32 -1, i32 -1>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor <2 x i32> [[NOTMASK]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %setbit = shl <2 x i32> <i32 1, i32 1>, %NBits
+  %ret = add <2 x i32> %setbit, <i32 -1, i32 -1>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @shl_add_vec_undef0(<3 x i32> %NBits) {
+; CHECK-LABEL: @shl_add_vec_undef0(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %setbit = shl <3 x i32> <i32 1, i32 undef, i32 1>, %NBits
+  %ret = add <3 x i32> %setbit, <i32 -1, i32 -1, i32 -1>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @shl_add_vec_undef1(<3 x i32> %NBits) {
+; CHECK-LABEL: @shl_add_vec_undef1(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %setbit = shl <3 x i32> <i32 1, i32 1, i32 1>, %NBits
+  %ret = add <3 x i32> %setbit, <i32 -1, i32 undef, i32 -1>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @shl_add_vec_undef2(<3 x i32> %NBits) {
+; CHECK-LABEL: @shl_add_vec_undef2(
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %setbit = shl <3 x i32> <i32 1, i32 undef, i32 1>, %NBits
+  %ret = add <3 x i32> %setbit, <i32 -1, i32 undef, i32 -1>
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+declare void @use32(i32)
+
+; One use only.
+define i32 @bad_oneuse0(i32 %NBits) {
+; CHECK-LABEL: @bad_oneuse0(
+; CHECK-NEXT:    [[SETBIT:%.*]] = shl i32 1, [[NBITS:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[SETBIT]])
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[SETBIT]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl i32 1, %NBits
+  call void @use32(i32 %setbit)
+  %ret = add i32 %setbit, -1
+  ret i32 %ret
+}
+
+; shift base is not `1` constant
+
+define i32 @bad_shl(i32 %base, i32 %NBits) {
+; CHECK-LABEL: @bad_shl(
+; CHECK-NEXT:    [[SETBIT:%.*]] = shl i32 [[BASE:%.*]], [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[SETBIT]], -1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl i32 %base, %NBits ; %base instead of 1
+  %ret = add i32 %setbit, -1
+  ret i32 %ret
+}
+
+; Second `add` operand is not `-1` constant
+
+define i32 @bad_add0(i32 %NBits, i32 %addop2) {
+; CHECK-LABEL: @bad_add0(
+; CHECK-NEXT:    [[SETBIT:%.*]] = shl i32 1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[SETBIT]], [[ADDOP2:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl i32 1, %NBits
+  %ret = add i32 %setbit, %addop2
+  ret i32 %ret
+}
+
+; Bad add constant
+
+define i32 @bad_add1(i32 %NBits) {
+; CHECK-LABEL: @bad_add1(
+; CHECK-NEXT:    [[SETBIT:%.*]] = shl i32 1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[SETBIT]], 1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl i32 1, %NBits
+  %ret = add i32 %setbit, 1 ; not -1
+  ret i32 %ret
+}
+
+define i32 @bad_add2(i32 %NBits) {
+; CHECK-LABEL: @bad_add2(
+; CHECK-NEXT:    [[SETBIT:%.*]] = shl i32 1, [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[SETBIT]], -2
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %setbit = shl i32 1, %NBits
+  %ret = add i32 %setbit, -2 ; not -1
+  ret i32 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/set.ll b/llvm/test/Transforms/InstCombine/set.ll
new file mode 100644
index 00000000000..b8c349aaf94
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/set.ll
@@ -0,0 +1,392 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; This test makes sure that all icmp instructions are eliminated.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@X = external global i32
+
+define i1 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = icmp eq i32 %A, %A
+  ; Never true
+  %C = icmp eq i32* @X, null
+  %D = and i1 %B, %C
+  ret i1 %D
+}
+
+define i1 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = icmp ne i32 %A, %A
+  ; Never false
+  %C = icmp ne i32* @X, null
+  %D = or i1 %B, %C
+  ret i1 %D
+}
+
+define i1 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = icmp slt i32 %A, %A
+  ret i1 %B
+}
+
+
+define i1 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = icmp sgt i32 %A, %A
+  ret i1 %B
+}
+
+define i1 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = icmp sle i32 %A, %A
+  ret i1 %B
+}
+
+define i1 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = icmp sge i32 %A, %A
+  ret i1 %B
+}
+
+define i1 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = icmp uge i32 %A, 0
+  ret i1 %B
+}
+
+define i1 @test8(i32 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = icmp ult i32 %A, 0
+  ret i1 %B
+}
+
+;; test operations on boolean values these should all be eliminated$a
+define i1 @test9(i1 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = icmp ult i1 %A, false
+  ret i1 %B
+}
+
+define i1 @test10(i1 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = icmp ugt i1 %A, true
+  ret i1 %B
+}
+
+define i1 @test11(i1 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = icmp ule i1 %A, true
+  ret i1 %B
+}
+
+define i1 @test12(i1 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = icmp uge i1 %A, false
+  ret i1 %B
+}
+
+define i1 @test13(i1 %A, i1 %B) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[B:%.*]], true
+; CHECK-NEXT:    [[C:%.*]] = or i1 [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %C = icmp uge i1 %A, %B
+  ret i1 %C
+}
+
+define <2 x i1> @test13vec(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test13vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i1> [[B:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i1> [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %C = icmp uge <2 x i1> %A, %B
+  ret <2 x i1> %C
+}
+
+define i1 @test14(i1 %A, i1 %B) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = xor i1 [[TMP1]], true
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %C = icmp eq i1 %A, %B
+  ret i1 %C
+}
+
+define <3 x i1> @test14vec(<3 x i1> %A, <3 x i1> %B) {
+; CHECK-LABEL: @test14vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <3 x i1> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = xor <3 x i1> [[TMP1]], <i1 true, i1 true, i1 true>
+; CHECK-NEXT:    ret <3 x i1> [[C]]
+;
+  %C = icmp eq <3 x i1> %A, %B
+  ret <3 x i1> %C
+}
+
+define i1 @bool_eq0(i64 %a) {
+; CHECK-LABEL: @bool_eq0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %b = icmp sgt i64 %a, 0
+  %c = icmp eq i64 %a, 1
+  %notc = icmp eq i1 %c, false
+  %and = and i1 %b, %notc
+  ret i1 %and
+}
+
+; This is equivalent to the previous test.
+
+define i1 @xor_of_icmps(i64 %a) {
+; CHECK-LABEL: @xor_of_icmps(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %b = icmp sgt i64 %a, 0
+  %c = icmp eq i64 %a, 1
+  %xor = xor i1 %c, %b
+  ret i1 %xor
+}
+
+; This is also equivalent to the previous test.
+
+define i1 @xor_of_icmps_commute(i64 %a) {
+; CHECK-LABEL: @xor_of_icmps_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %b = icmp sgt i64 %a, 0
+  %c = icmp eq i64 %a, 1
+  %xor = xor i1 %b, %c
+  ret i1 %xor
+}
+
+; FIXME: This is (a != 5).
+
+define i1 @xor_of_icmps_folds_more(i64 %a) {
+; CHECK-LABEL: @xor_of_icmps_folds_more(
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt i64 [[A:%.*]], 4
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i64 [[A]], 6
+; CHECK-NEXT:    [[XOR:%.*]] = xor i1 [[B]], [[C]]
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %b = icmp sgt i64 %a, 4
+  %c = icmp slt i64 %a, 6
+  %xor = xor i1 %b, %c
+  ret i1 %xor
+}
+
+; https://bugs.llvm.org/show_bug.cgi?id=2844
+
+define i32 @PR2844(i32 %x) {
+; CHECK-LABEL: @PR2844(
+; CHECK-NEXT:    [[A:%.*]] = icmp ne i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt i32 [[X]], -638208502
+; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT:    [[SEL:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %A = icmp eq i32 %x, 0
+  %B = icmp slt i32 %x, -638208501
+  %or = or i1 %A, %B
+  %sel = select i1 %or, i32 0, i32 1
+  ret i32 %sel
+}
+
+define i1 @test16(i32 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = and i32 %A, 5
+  ; Is never true
+  %C = icmp eq i32 %B, 8
+  ret i1 %C
+}
+
+define i1 @test17(i8 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = or i8 %A, 1
+  ; Always false
+  %C = icmp eq i8 %B, 2
+  ret i1 %C
+}
+
+define i1 @test18(i1 %C, i32 %a) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[ENDIF:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    br label [[ENDIF]]
+; CHECK:       endif:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  br i1 %C, label %endif, label %else
+
+else:
+  br label %endif
+
+endif:
+  %b.0 = phi i32 [ 0, %entry ], [ 1, %else ]
+  %tmp.4 = icmp slt i32 %b.0, 123
+  ret i1 %tmp.4
+}
+
+define i1 @test19(i1 %A, i1 %B) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = xor i1 [[TMP1]], true
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = zext i1 %A to i32
+  %b = zext i1 %B to i32
+  %C = icmp eq i32 %a, %b
+  ret i1 %C
+}
+
+define i32 @test20(i32 %A) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = and i32 %A, 1
+  %C = icmp ne i32 %B, 0
+  %D = zext i1 %C to i32
+  ret i32 %D
+}
+
+define <2 x i32> @test20vec(<2 x i32> %A) {
+; CHECK-LABEL: @test20vec(
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %B = and <2 x i32> %A, <i32 1, i32 1>
+  %C = icmp ne <2 x i32> %B, zeroinitializer
+  %D = zext <2 x i1> %C to <2 x i32>
+  ret <2 x i32> %D
+}
+
+define i32 @test21(i32 %a) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[TMP_6:%.*]] = lshr i32 [[A:%.*]], 2
+; CHECK-NEXT:    [[TMP_6_LOBIT:%.*]] = and i32 [[TMP_6]], 1
+; CHECK-NEXT:    ret i32 [[TMP_6_LOBIT]]
+;
+  %tmp.6 = and i32 %a, 4
+  %not.tmp.7 = icmp ne i32 %tmp.6, 0
+  %retval = zext i1 %not.tmp.7 to i32
+  ret i32 %retval
+}
+
+define <2 x i32> @test21vec(<2 x i32> %a) {
+; CHECK-LABEL: @test21vec(
+; CHECK-NEXT:    [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_6_LOBIT]]
+;
+  %tmp.6 = and <2 x i32> %a, <i32 4, i32 4>
+  %not.tmp.7 = icmp ne <2 x i32> %tmp.6, zeroinitializer
+  %retval = zext <2 x i1> %not.tmp.7 to <2 x i32>
+  ret <2 x i32> %retval
+}
+
+define i1 @test22(i32 %A, i32 %X) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = and i32 %A, 100663295
+  %C = icmp ult i32 %B, 268435456
+  %Y = and i32 %X, 7
+  %Z = icmp sgt i32 %Y, -1
+  %R = or i1 %C, %Z
+  ret i1 %R
+}
+
+define i32 @test23(i32 %a) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[TMP_1:%.*]] = and i32 [[A:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[TMP_1]], 1
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %tmp.1 = and i32 %a, 1
+  %tmp.2 = icmp eq i32 %tmp.1, 0
+  %tmp.3 = zext i1 %tmp.2 to i32
+  ret i32 %tmp.3
+}
+
+define <2 x i32> @test23vec(<2 x i32> %a) {
+; CHECK-LABEL: @test23vec(
+; CHECK-NEXT:    [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[TMP_1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tmp.1 = and <2 x i32> %a, <i32 1, i32 1>
+  %tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer
+  %tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32>
+  ret <2 x i32> %tmp.3
+}
+
+define i32 @test24(i32 %a) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[TMP_1:%.*]] = lshr i32 [[A:%.*]], 2
+; CHECK-NEXT:    [[TMP_1_LOBIT:%.*]] = and i32 [[TMP_1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[TMP_1_LOBIT]], 1
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %tmp1 = and i32 %a, 4
+  %tmp.1 = lshr i32 %tmp1, 2
+  %tmp.2 = icmp eq i32 %tmp.1, 0
+  %tmp.3 = zext i1 %tmp.2 to i32
+  ret i32 %tmp.3
+}
+
+define <2 x i32> @test24vec(<2 x i32> %a) {
+; CHECK-LABEL: @test24vec(
+; CHECK-NEXT:    [[TMP_1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP_1_LOBIT:%.*]] = and <2 x i32> [[TMP_1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[TMP_1_LOBIT]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tmp1 = and <2 x i32> %a, <i32 4, i32 4>
+  %tmp.1 = lshr <2 x i32> %tmp1, <i32 2, i32 2>
+  %tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer
+  %tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32>
+  ret <2 x i32> %tmp.3
+}
+
+define i1 @test25(i32 %A) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = and i32 %A, 2
+  %C = icmp ugt i32 %B, 2
+  ret i1 %C
+}
+
diff --git a/llvm/test/Transforms/InstCombine/setcc-strength-reduce.ll b/llvm/test/Transforms/InstCombine/setcc-strength-reduce.ll
new file mode 100644
index 00000000000..138712e5a9b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/setcc-strength-reduce.ll
@@ -0,0 +1,37 @@
+; This test ensures that "strength reduction" of conditional expressions are
+; working.  Basically this boils down to converting setlt,gt,le,ge instructions
+; into equivalent setne,eq instructions.
+;
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep -v "icmp eq" | grep -v "icmp ne" | not grep icmp
+; END.
+
+define i1 @test1(i32 %A) {
+        ; setne %A, 0
+        %B = icmp uge i32 %A, 1         ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test2(i32 %A) {
+       ; setne %A, 0
+        %B = icmp ugt i32 %A, 0         ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test3(i8 %A) {
+        ; setne %A, -128
+        %B = icmp sge i8 %A, -127               ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test4(i8 %A) {
+        ; setne %A, 127 
+        %B = icmp sle i8 %A, 126                ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test5(i8 %A) {
+        ; setne %A, 127
+        %B = icmp slt i8 %A, 127                ; <i1> [#uses=1]
+        ret i1 %B
+}
diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll
new file mode 100644
index 00000000000..faf33719927
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sext.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define i64 @test1(i32 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[T:%.*]] = call i32 @llvm.ctpop.i32(i32 %x)
+; CHECK-NEXT:    [[S1:%.*]] = zext i32 [[T]] to i64
+; CHECK-NEXT:    ret i64 [[S1]]
+;
+  %t = call i32 @llvm.ctpop.i32(i32 %x)
+  %s = sext i32 %t to i64
+  ret i64 %s
+}
+
+define i64 @test2(i32 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[T:%.*]] = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; CHECK-NEXT:    [[S1:%.*]] = zext i32 [[T]] to i64
+; CHECK-NEXT:    ret i64 [[S1]]
+;
+  %t = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %s = sext i32 %t to i64
+  ret i64 %s
+}
+
+define i64 @test3(i32 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[T:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; CHECK-NEXT:    [[S1:%.*]] = zext i32 [[T]] to i64
+; CHECK-NEXT:    ret i64 [[S1]]
+;
+  %t = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %s = sext i32 %t to i64
+  ret i64 %s
+}
+
+define i64 @test4(i32 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[T:%.*]] = udiv i32 %x, 3
+; CHECK-NEXT:    [[S1:%.*]] = zext i32 [[T]] to i64
+; CHECK-NEXT:    ret i64 [[S1]]
+;
+  %t = udiv i32 %x, 3
+  %s = sext i32 %t to i64
+  ret i64 %s
+}
+
+define i64 @test5(i32 %x) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[T:%.*]] = urem i32 %x, 30000
+; CHECK-NEXT:    [[S1:%.*]] = zext i32 [[T]] to i64
+; CHECK-NEXT:    ret i64 [[S1]]
+;
+  %t = urem i32 %x, 30000
+  %s = sext i32 %t to i64
+  ret i64 %s
+}
+
+define i64 @test6(i32 %x) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[U:%.*]] = lshr i32 %x, 3
+; CHECK-NEXT:    [[T:%.*]] = mul nuw nsw i32 [[U]], 3
+; CHECK-NEXT:    [[S1:%.*]] = zext i32 [[T]] to i64
+; CHECK-NEXT:    ret i64 [[S1]]
+;
+  %u = lshr i32 %x, 3
+  %t = mul i32 %u, 3
+  %s = sext i32 %t to i64
+  ret i64 %s
+}
+
+define i64 @test7(i32 %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[T:%.*]] = and i32 %x, 511
+; CHECK-NEXT:    [[U:%.*]] = sub nuw nsw i32 20000, [[T]]
+; CHECK-NEXT:    [[S1:%.*]] = zext i32 [[U]] to i64
+; CHECK-NEXT:    ret i64 [[S1]]
+;
+  %t = and i32 %x, 511
+  %u = sub i32 20000, %t
+  %s = sext i32 %u to i64
+  ret i64 %s
+}
+
+define i32 @test8(i8 %a, i32 %f, i1 %p, i32* %z) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[D:%.*]] = lshr i32 %f, 24
+; CHECK-NEXT:    [[N:%.*]] = select i1 %p, i32 [[D]], i32 0
+; CHECK-NEXT:    ret i32 [[N]]
+;
+  %d = lshr i32 %f, 24
+  %e = select i1 %p, i32 %d, i32 0
+  %s = trunc i32 %e to i16
+  %n = sext i16 %s to i32
+  ret i32 %n
+}
+
+; rdar://6013816
+define i16 @test9(i16 %t, i1 %cond) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %T, label %F
+; CHECK:       T:
+; CHECK-NEXT:    br label %F
+; CHECK:       F:
+; CHECK-NEXT:    [[V_OFF0:%.*]] = phi i16 [ %t, %T ], [ 42, %entry ]
+; CHECK-NEXT:    ret i16 [[V_OFF0]]
+;
+entry:
+  br i1 %cond, label %T, label %F
+T:
+  %t2 = sext i16 %t to i32
+  br label %F
+
+F:
+  %V = phi i32 [%t2, %T], [42, %entry]
+  %W = trunc i32 %V to i16
+  ret i16 %W
+}
+
+; PR2638
+define i32 @test10(i32 %i) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[B1:%.*]] = shl i32 %i, 30
+; CHECK-NEXT:    [[B:%.*]] = ashr exact i32 [[B1]], 30
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %tmp12 = trunc i32 %i to i8
+  %tmp16 = shl i8 %tmp12, 6
+  %a = ashr i8 %tmp16, 6
+  %b = sext i8 %a to i32
+  ret i32 %b
+}
+
+define void @test11(<2 x i16> %srcA, <2 x i16> %srcB, <2 x i16>* %dst) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i16> %srcB, %srcA
+; CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i16>
+; CHECK-NEXT:    store <2 x i16> [[SEXT]], <2 x i16>* %dst, align 4
+; CHECK-NEXT:    ret void
+;
+  %cmp = icmp eq <2 x i16> %srcB, %srcA
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %tmask = ashr <2 x i16> %sext, <i16 15, i16 15>
+  store <2 x i16> %tmask, <2 x i16>* %dst
+  ret void
+}
+
+define i64 @test12(i32 %x) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 %x, 1
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[SHR]]
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[SUB]] to i64
+; CHECK-NEXT:    ret i64 [[CONV]]
+;
+  %shr = lshr i32 %x, 1
+  %sub = sub nsw i32 0, %shr
+  %conv = sext i32 %sub to i64
+  ret i64 %conv
+}
+
+define i32 @test13(i32 %x) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[AND:%.*]] = lshr i32 %x, 3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[AND]], 1
+; CHECK-NEXT:    [[SEXT:%.*]] = add nsw i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[SEXT]]
+;
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %ext = sext i1 %cmp to i32
+  ret i32 %ext
+}
+
+define i32 @test14(i16 %x) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[AND:%.*]] = lshr i16 %x, 4
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[AND]], 1
+; CHECK-NEXT:    [[SEXT:%.*]] = add nsw i16 [[TMP1]], -1
+; CHECK-NEXT:    [[EXT:%.*]] = sext i16 [[SEXT]] to i32
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %and = and i16 %x, 16
+  %cmp = icmp ne i16 %and, 16
+  %ext = sext i1 %cmp to i32
+  ret i32 %ext
+}
+
+define i32 @test15(i32 %x) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 27
+; CHECK-NEXT:    [[SEXT:%.*]] = ashr i32 [[TMP1]], 31
+; CHECK-NEXT:    ret i32 [[SEXT]]
+;
+  %and = and i32 %x, 16
+  %cmp = icmp ne i32 %and, 0
+  %ext = sext i1 %cmp to i32
+  ret i32 %ext
+}
+
+define i32 @test16(i16 %x) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 %x, 12
+; CHECK-NEXT:    [[SEXT:%.*]] = ashr i16 [[TMP1]], 15
+; CHECK-NEXT:    [[EXT:%.*]] = sext i16 [[SEXT]] to i32
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %and = and i16 %x, 8
+  %cmp = icmp eq i16 %and, 8
+  %ext = sext i1 %cmp to i32
+  ret i32 %ext
+}
+
+define i32 @test17(i1 %x) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[C2:%.*]] = zext i1 %x to i32
+; CHECK-NEXT:    ret i32 [[C2]]
+;
+  %c1 = sext i1 %x to i32
+  %c2 = sub i32 0, %c1
+  ret i32 %c2
+}
+
+define i32 @test18(i16 %x) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i16 %x, 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TMP1]], i16 %x, i16 0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[SEL]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %cmp = icmp slt i16 %x, 0
+  %sel = select i1 %cmp, i16 0, i16 %x
+  %ext = sext i16 %sel to i32
+  ret i32 %ext
+}
+
diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll
new file mode 100644
index 00000000000..497159f19b6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shift-add.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @shl_C1_add_A_C2_i32(i16 %A) {
+; CHECK-LABEL: @shl_C1_add_A_C2_i32(
+; CHECK-NEXT:    [[B:%.*]] = zext i16 %A to i32
+; CHECK-NEXT:    [[D:%.*]] = shl i32 192, [[B]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %B = zext i16 %A to i32
+  %C = add i32 %B, 5
+  %D = shl i32 6, %C
+  ret i32 %D
+}
+
+define i32 @ashr_C1_add_A_C2_i32(i32 %A) {
+; CHECK-LABEL: @ashr_C1_add_A_C2_i32(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = and i32 %A, 65535
+  %C = add i32 %B, 5
+  %D = ashr i32 6, %C
+  ret i32 %D
+}
+
+define i32 @lshr_C1_add_A_C2_i32(i32 %A) {
+; CHECK-LABEL: @lshr_C1_add_A_C2_i32(
+; CHECK-NEXT:    [[B:%.*]] = and i32 %A, 65535
+; CHECK-NEXT:    [[D:%.*]] = shl i32 192, [[B]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %B = and i32 %A, 65535
+  %C = add i32 %B, 5
+  %D = shl i32 6, %C
+  ret i32 %D
+}
+
+define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
+; CHECK-LABEL: @shl_C1_add_A_C2_v4i32(
+; CHECK-NEXT:    [[B:%.*]] = zext <4 x i16> %A to <4 x i32>
+; CHECK-NEXT:    [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 undef, i32 -458752>, [[B]]
+; CHECK-NEXT:    ret <4 x i32> [[D]]
+;
+  %B = zext <4 x i16> %A to <4 x i32>
+  %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
+  %D = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
+  ret <4 x i32> %D
+}
+
+define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
+; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32(
+; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
+; CHECK-NEXT:    [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 undef, i32 -1>, [[B]]
+; CHECK-NEXT:    ret <4 x i32> [[D]]
+;
+  %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
+  %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
+  %D = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
+  ret <4 x i32> %D
+}
+
+define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) {
+; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32(
+; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
+; CHECK-NEXT:    [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 undef, i32 65535>, [[B]]
+; CHECK-NEXT:    ret <4 x i32> [[D]]
+;
+  %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
+  %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
+  %D = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
+  ret <4 x i32> %D
+}
diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll
new file mode 100644
index 00000000000..6aa262fd931
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shift-shift.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; These would crash if we didn't check for a negative shift.
+
+; https://llvm.org/bugs/show_bug.cgi?id=12967
+
+define void @pr12967() {
+; CHECK-LABEL: @pr12967(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    br label %loop
+;
+entry:
+  br label %loop
+
+loop:
+  %c = phi i32 [ %shl, %loop ], [ undef, %entry ]
+  %shr = shl i32 %c, 7
+  %shl = lshr i32 %shr, -2
+  br label %loop
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=26760
+
+define void @pr26760() {
+; CHECK-LABEL: @pr26760(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    br label %loop
+;
+entry:
+  br label %loop
+
+loop:
+  %c = phi i32 [ %shl, %loop ], [ undef, %entry ]
+  %shr = lshr i32 %c, 7
+  %shl = shl i32 %shr, -2
+  br label %loop
+}
+
+; Converting the 2 shifts to SHL 6 without the AND is wrong.
+; https://llvm.org/bugs/show_bug.cgi?id=8547
+
+define i32 @pr8547(i32* %g) {
+; CHECK-LABEL: @pr8547(
+; CHECK-NEXT:  codeRepl:
+; CHECK-NEXT:    br label %for.cond
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ]
+; CHECK-NEXT:    store i32 [[STOREMERGE]], i32* %g, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i32 [[STOREMERGE]], 6
+; CHECK-NEXT:    [[CONV2:%.*]] = and i32 [[TMP0]], 64
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[CONV2]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %for.cond, label %codeRepl2
+; CHECK:       codeRepl2:
+; CHECK-NEXT:    ret i32 [[CONV2]]
+;
+codeRepl:
+  br label %for.cond
+
+for.cond:
+  %storemerge = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ]
+  store i32 %storemerge, i32* %g, align 4
+  %shl = shl i32 %storemerge, 30
+  %conv2 = lshr i32 %shl, 24
+  %tobool = icmp eq i32 %conv2, 0
+  br i1 %tobool, label %for.cond, label %codeRepl2
+
+codeRepl2:
+  ret i32 %conv2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/shift-sra.ll b/llvm/test/Transforms/InstCombine/shift-sra.ll
new file mode 100644
index 00000000000..4c28e878bbe
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shift-sra.ll
@@ -0,0 +1,217 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+
+define i32 @test1(i32 %X, i8 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[SHIFT_UPGRD_1:%.*]] = zext i8 %A to i32
+; CHECK-NEXT:    [[Y1:%.*]] = lshr i32 %X, [[SHIFT_UPGRD_1]]
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[Y1]], 1
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %shift.upgrd.1 = zext i8 %A to i32
+  ; can be logical shift.
+  %Y = ashr i32 %X, %shift.upgrd.1
+  %Z = and i32 %Y, 1
+  ret i32 %Z
+}
+
+define i32 @test2(i8 %tmp) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 %tmp to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 7
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP4]], 3
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %tmp3 = zext i8 %tmp to i32
+  %tmp4 = add i32 %tmp3, 7
+  %tmp5 = ashr i32 %tmp4, 3
+  ret i32 %tmp5
+}
+
+define i64 @test3(i1 %X, i64 %Y, i1 %Cond) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    br i1 %Cond, label %T, label %F
+; CHECK:       T:
+; CHECK-NEXT:    [[X2:%.*]] = sext i1 %X to i64
+; CHECK-NEXT:    br label %C
+; CHECK:       F:
+; CHECK-NEXT:    [[Y2:%.*]] = ashr i64 %Y, 63
+; CHECK-NEXT:    br label %C
+; CHECK:       C:
+; CHECK-NEXT:    [[P:%.*]] = phi i64 [ [[X2]], %T ], [ [[Y2]], %F ]
+; CHECK-NEXT:    ret i64 [[P]]
+;
+  br i1 %Cond, label %T, label %F
+T:
+  %X2 = sext i1 %X to i64
+  br label %C
+F:
+  %Y2 = ashr i64 %Y, 63
+  br label %C
+C:
+  %P = phi i64 [%X2, %T], [%Y2, %F]
+  %S = ashr i64 %P, 12
+  ret i64 %S
+}
+
+define i64 @test4(i1 %X, i64 %Y, i1 %Cond) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    br i1 %Cond, label %T, label %F
+; CHECK:       T:
+; CHECK-NEXT:    [[X2:%.*]] = sext i1 %X to i64
+; CHECK-NEXT:    br label %C
+; CHECK:       F:
+; CHECK-NEXT:    [[Y2:%.*]] = ashr i64 %Y, 63
+; CHECK-NEXT:    br label %C
+; CHECK:       C:
+; CHECK-NEXT:    [[P:%.*]] = phi i64 [ [[X2]], %T ], [ [[Y2]], %F ]
+; CHECK-NEXT:    ret i64 [[P]]
+;
+  br i1 %Cond, label %T, label %F
+T:
+  %X2 = sext i1 %X to i64
+  br label %C
+F:
+  %Y2 = ashr i64 %Y, 63
+  br label %C
+C:
+  %P = phi i64 [%X2, %T], [%Y2, %F]
+  %R = shl i64 %P, 12
+  %S = ashr i64 %R, 12
+  ret i64 %S
+}
+
+; rdar://7732987
+define i32 @test5(i32 %Y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    br i1 undef, label %A, label %C
+; CHECK:       A:
+; CHECK-NEXT:    br i1 undef, label %B, label %D
+; CHECK:       B:
+; CHECK-NEXT:    br label %D
+; CHECK:       C:
+; CHECK-NEXT:    br i1 undef, label %D, label %E
+; CHECK:       D:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 0, %A ], [ 0, %B ], [ %Y, %C ]
+; CHECK-NEXT:    [[S:%.*]] = ashr i32 [[P]], 16
+; CHECK-NEXT:    ret i32 [[S]]
+; CHECK:       E:
+; CHECK-NEXT:    ret i32 0
+;
+  br i1 undef, label %A, label %C
+A:
+  br i1 undef, label %B, label %D
+B:
+  br label %D
+C:
+  br i1 undef, label %D, label %E
+D:
+  %P = phi i32 [0, %A], [0, %B], [%Y, %C]
+  %S = ashr i32 %P, 16
+  ret i32 %S
+E:
+  ret i32 0
+}
+
+; (X >>s C1) >>s C2 --> X >>s (C1 + C2)
+
+define i32 @ashr_ashr(i32 %x) {
+; CHECK-LABEL: @ashr_ashr(
+; CHECK-NEXT:    [[SH2:%.*]] = ashr i32 %x, 12
+; CHECK-NEXT:    ret i32 [[SH2]]
+;
+  %sh1 = ashr i32 %x, 5
+  %sh2 = ashr i32 %sh1, 7
+  ret i32 %sh2
+}
+
+; PR3851
+; (X >>s C1) >>s C2 --> X >>s (Bitwidth - 1)
+
+define i32 @ashr_overshift(i32 %x) {
+; CHECK-LABEL: @ashr_overshift(
+; CHECK-NEXT:    [[SH2:%.*]] = ashr i32 %x, 31
+; CHECK-NEXT:    ret i32 [[SH2]]
+;
+  %sh1 = ashr i32 %x, 15
+  %sh2 = ashr i32 %sh1, 17
+  ret i32 %sh2
+}
+
+; (X >>s C1) >>s C2 --> X >>s (C1 + C2)
+
+define <2 x i32> @ashr_ashr_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @ashr_ashr_splat_vec(
+; CHECK-NEXT:    [[SH2:%.*]] = ashr <2 x i32> %x, <i32 12, i32 12>
+; CHECK-NEXT:    ret <2 x i32> [[SH2]]
+;
+  %sh1 = ashr <2 x i32> %x, <i32 5, i32 5>
+  %sh2 = ashr <2 x i32> %sh1, <i32 7, i32 7>
+  ret <2 x i32> %sh2
+}
+
+; (X >>s C1) >>s C2 --> X >>s (Bitwidth - 1)
+
+define <2 x i32> @ashr_overshift_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @ashr_overshift_splat_vec(
+; CHECK-NEXT:    [[SH2:%.*]] = ashr <2 x i32> %x, <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[SH2]]
+;
+  %sh1 = ashr <2 x i32> %x, <i32 15, i32 15>
+  %sh2 = ashr <2 x i32> %sh1, <i32 17, i32 17>
+  ret <2 x i32> %sh2
+}
+
+; ashr (sext X), C --> sext (ashr X, C')
+
+define i32 @hoist_ashr_ahead_of_sext_1(i8 %x) {
+; CHECK-LABEL: @hoist_ashr_ahead_of_sext_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 %x, 3
+; CHECK-NEXT:    [[R:%.*]] = sext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %sext = sext i8 %x to i32
+  %r = ashr i32 %sext, 3
+  ret i32 %r
+}
+
+; ashr (sext X), C --> sext (ashr X, C')
+
+define <2 x i32> @hoist_ashr_ahead_of_sext_1_splat(<2 x i8> %x) {
+; CHECK-LABEL: @hoist_ashr_ahead_of_sext_1_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> %x, <i8 3, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %sext = sext <2 x i8> %x to <2 x i32>
+  %r = ashr <2 x i32> %sext, <i32 3, i32 3>
+  ret <2 x i32> %r
+}
+
+; ashr (sext X), C --> sext (ashr X, C') -- the shift amount must be clamped
+
+define i32 @hoist_ashr_ahead_of_sext_2(i8 %x) {
+; CHECK-LABEL: @hoist_ashr_ahead_of_sext_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 %x, 7
+; CHECK-NEXT:    [[R:%.*]] = sext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %sext = sext i8 %x to i32
+  %r = ashr i32 %sext, 8
+  ret i32 %r
+}
+
+; ashr (sext X), C --> sext (ashr X, C') -- the shift amount must be clamped
+
+define <2 x i32> @hoist_ashr_ahead_of_sext_2_splat(<2 x i8> %x) {
+; CHECK-LABEL: @hoist_ashr_ahead_of_sext_2_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> %x, <i8 7, i8 7>
+; CHECK-NEXT:    [[R:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %sext = sext <2 x i8> %x to <2 x i32>
+  %r = ashr <2 x i32> %sext, <i32 8, i32 8>
+  ret <2 x i32> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
new file mode 100644
index 00000000000..9ac32439b6f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -0,0 +1,1539 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <4 x i32> @lshr_non_splat_vector(<4 x i32> %A) {
+; CHECK-LABEL: @lshr_non_splat_vector(
+; CHECK-NEXT:    [[B:%.*]] = lshr <4 x i32> [[A:%.*]], <i32 32, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[B]]
+;
+  %B = lshr <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
+  ret <4 x i32> %B
+}
+
+define <4 x i32> @shl_non_splat_vector(<4 x i32> %A) {
+; CHECK-LABEL: @shl_non_splat_vector(
+; CHECK-NEXT:    [[B:%.*]] = shl <4 x i32> [[A:%.*]], <i32 32, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[B]]
+;
+  %B = shl <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
+  ret <4 x i32> %B
+}
+
+define i32 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = mul i32 %A, 6
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = shl i32 %A, 1      ;; convert to an mul instruction
+  %C = mul i32 %B, 3
+  ret i32 %C
+}
+
+define i32 @test6a(i32 %A) {
+; CHECK-LABEL: @test6a(
+; CHECK-NEXT:    [[C:%.*]] = mul i32 %A, 6
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = mul i32 %A, 3
+  %C = shl i32 %B, 1      ;; convert to an mul instruction
+  ret i32 %C
+}
+
+;; (A << 5) << 3 === A << 8 == 0
+define i8 @test8(i8 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = shl i8 %A, 5
+  %C = shl i8 %B, 3
+  ret i8 %C
+}
+
+;; (A << 7) >> 7 === A & 1
+define i8 @test9(i8 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[B:%.*]] = and i8 %A, 1
+; CHECK-NEXT:    ret i8 [[B]]
+;
+  %B = shl i8 %A, 7
+  %C = lshr i8 %B, 7
+  ret i8 %C
+}
+
+;; (A >> 7) << 7 === A & 128
+
+define i8 @test10(i8 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[B:%.*]] = and i8 %A, -128
+; CHECK-NEXT:    ret i8 [[B]]
+;
+  %B = lshr i8 %A, 7
+  %C = shl i8 %B, 7
+  ret i8 %C
+}
+
+;; Allow the simplification when the lshr shift is exact.
+define i8 @test10a(i8 %A) {
+; CHECK-LABEL: @test10a(
+; CHECK-NEXT:    ret i8 %A
+;
+  %B = lshr exact i8 %A, 7
+  %C = shl i8 %B, 7
+  ret i8 %C
+}
+
+;; This transformation is deferred to DAGCombine:
+;; (A >> 3) << 4 === (A & 0x1F) << 1
+;; The shl may be valuable to scalar evolution.
+define i8 @test11(i8 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[A:%.*]] = mul i8 %A, 3
+; CHECK-NEXT:    [[B:%.*]] = lshr i8 [[A]], 3
+; CHECK-NEXT:    [[C:%.*]] = shl i8 [[B]], 4
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = mul i8 %A, 3
+  %B = lshr i8 %a, 3
+  %C = shl i8 %B, 4
+  ret i8 %C
+}
+
+;; Allow the simplification in InstCombine when the lshr shift is exact.
+define i8 @test11a(i8 %A) {
+; CHECK-LABEL: @test11a(
+; CHECK-NEXT:    [[C:%.*]] = mul i8 %A, 6
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = mul i8 %A, 3
+  %B = lshr exact i8 %a, 3
+  %C = shl i8 %B, 4
+  ret i8 %C
+}
+
+;; This is deferred to DAGCombine unless %B is single-use.
+;; (A >> 8) << 8 === A & -256
+define i32 @test12(i32 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[B1:%.*]] = and i32 %A, -256
+; CHECK-NEXT:    ret i32 [[B1]]
+;
+  %B = ashr i32 %A, 8
+  %C = shl i32 %B, 8
+  ret i32 %C
+}
+
+;; ((A >>s 6) << 6 === (A & FFFFFFC0)
+define i8 @shishi(i8 %x) {
+; CHECK-LABEL: @shishi(
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 6
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[X]], -64
+; CHECK-NEXT:    [[EXTRA_USE_OF_A:%.*]] = mul nsw i8 [[A]], 5
+; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[EXTRA_USE_OF_A]], [[B]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %a = ashr i8 %x, 6
+  %b = shl i8 %a, 6
+  %extra_use_of_a = mul i8 %a, 5
+  %r = sdiv i8 %extra_use_of_a, %b
+  ret i8 %r
+}
+
+;; This transformation is deferred to DAGCombine:
+;; (A >> 3) << 4 === (A & -8) * 2
+;; The shl may be valuable to scalar evolution.
+define i8 @test13(i8 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[A:%.*]] = mul i8 %A, 3
+; CHECK-NEXT:    [[B1:%.*]] = lshr i8 [[A]], 3
+; CHECK-NEXT:    [[C:%.*]] = shl i8 [[B1]], 4
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = mul i8 %A, 3
+  %B = ashr i8 %a, 3
+  %C = shl i8 %B, 4
+  ret i8 %C
+}
+
+define i8 @test13a(i8 %A) {
+; CHECK-LABEL: @test13a(
+; CHECK-NEXT:    [[C:%.*]] = mul i8 %A, 6
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %a = mul i8 %A, 3
+  %B = ashr exact i8 %a, 3
+  %C = shl i8 %B, 4
+  ret i8 %C
+}
+
+;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
+define i32 @test14(i32 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[B:%.*]] = and i32 %A, -19760
+; CHECK-NEXT:    [[C:%.*]] = or i32 [[B]], 19744
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = lshr i32 %A, 4
+  %C = or i32 %B, 1234
+  %D = shl i32 %C, 4
+  ret i32 %D
+}
+
+;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
+define i32 @test14a(i32 %A) {
+; CHECK-LABEL: @test14a(
+; CHECK-NEXT:    [[C:%.*]] = and i32 %A, 77
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = shl i32 %A, 4
+  %C = and i32 %B, 1234
+  %D = lshr i32 %C, 4
+  ret i32 %D
+}
+
+define i32 @test15(i1 %C) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[A:%.*]] = select i1 %C, i32 12, i32 4
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %A = select i1 %C, i32 3, i32 1
+  %V = shl i32 %A, 2
+  ret i32 %V
+}
+
+define i32 @test15a(i1 %C) {
+; CHECK-LABEL: @test15a(
+; CHECK-NEXT:    [[V:%.*]] = select i1 %C, i32 512, i32 128
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i8 3, i8 1
+  %shift.upgrd.4 = zext i8 %A to i32
+  %V = shl i32 64, %shift.upgrd.4
+  ret i32 %V
+}
+
+define i1 @test16(i32 %X) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[TMP_6:%.*]] = and i32 %X, 16
+; CHECK-NEXT:    [[TMP_7:%.*]] = icmp ne i32 [[TMP_6]], 0
+; CHECK-NEXT:    ret i1 [[TMP_7]]
+;
+  %tmp.3 = ashr i32 %X, 4
+  %tmp.6 = and i32 %tmp.3, 1
+  %tmp.7 = icmp ne i32 %tmp.6, 0
+  ret i1 %tmp.7
+}
+
+define i1 @test17(i32 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i32 %A, -8
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[B_MASK]], 9872
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = lshr i32 %A, 3
+  %C = icmp eq i32 %B, 1234
+  ret i1 %C
+}
+
+define <2 x i1> @test17vec(<2 x i32> %A) {
+; CHECK-LABEL: @test17vec(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i32> %A, <i32 -8, i32 -8>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], <i32 9872, i32 9872>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = lshr <2 x i32> %A, <i32 3, i32 3>
+  %C = icmp eq <2 x i32> %B, <i32 1234, i32 1234>
+  ret <2 x i1> %C
+}
+
+define i1 @test18(i8 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = lshr i8 %A, 7
+  ;; false
+  %C = icmp eq i8 %B, 123
+  ret i1 %C
+}
+
+define i1 @test19(i32 %A) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 %A, 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = ashr i32 %A, 2
+  ;; (X & -4) == 0
+  %C = icmp eq i32 %B, 0
+  ret i1 %C
+}
+
+define <2 x i1> @test19vec(<2 x i32> %A) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> %A, <i32 4, i32 4>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = ashr <2 x i32> %A, <i32 2, i32 2>
+  %C = icmp eq <2 x i32> %B, zeroinitializer
+  ret <2 x i1> %C
+}
+
+;; X >u ~4
+define i1 @test19a(i32 %A) {
+; CHECK-LABEL: @test19a(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 %A, -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = ashr i32 %A, 2
+  %C = icmp eq i32 %B, -1
+  ret i1 %C
+}
+
+define <2 x i1> @test19a_vec(<2 x i32> %A) {
+; CHECK-LABEL: @test19a_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> %A, <i32 -5, i32 -5>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = ashr <2 x i32> %A, <i32 2, i32 2>
+  %C = icmp eq <2 x i32> %B, <i32 -1, i32 -1>
+  ret <2 x i1> %C
+}
+
+define i1 @test20(i8 %A) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = ashr i8 %A, 7
+  ;; false
+  %C = icmp eq i8 %B, 123
+  ret i1 %C
+}
+
+define i1 @test21(i8 %A) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i8 %A, 15
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[B_MASK]], 8
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = shl i8 %A, 4
+  %C = icmp eq i8 %B, -128
+  ret i1 %C
+}
+
+define i1 @test22(i8 %A) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i8 %A, 15
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[B_MASK]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = shl i8 %A, 4
+  %C = icmp eq i8 %B, 0
+  ret i1 %C
+}
+
+define i8 @test23(i32 %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[D:%.*]] = trunc i32 %A to i8
+; CHECK-NEXT:    ret i8 [[D]]
+;
+  ;; casts not needed
+  %B = shl i32 %A, 24
+  %C = ashr i32 %B, 24
+  %D = trunc i32 %C to i8
+  ret i8 %D
+}
+
+define i8 @test24(i8 %X) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[Z:%.*]] = and i8 %X, 3
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+  %Y = and i8 %X, -5
+  %Z = shl i8 %Y, 5
+  %Q = ashr i8 %Z, 5
+  ret i8 %Q
+}
+
+define i32 @test25(i32 %tmp.2, i32 %AA) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[TMP_3:%.*]] = and i32 %tmp.2, -131072
+; CHECK-NEXT:    [[X2:%.*]] = add i32 [[TMP_3]], %AA
+; CHECK-NEXT:    [[TMP_6:%.*]] = and i32 [[X2]], -131072
+; CHECK-NEXT:    ret i32 [[TMP_6]]
+;
+  %x = lshr i32 %AA, 17
+  %tmp.3 = lshr i32 %tmp.2, 17
+  %tmp.5 = add i32 %tmp.3, %x
+  %tmp.6 = shl i32 %tmp.5, 17
+  ret i32 %tmp.6
+}
+
+define <2 x i32> @test25_vector(<2 x i32> %tmp.2, <2 x i32> %AA) {
+; CHECK-LABEL: @test25_vector(
+; CHECK-NEXT:    [[TMP_3:%.*]] = and <2 x i32> %tmp.2, <i32 -131072, i32 -131072>
+; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[TMP_3]], %AA
+; CHECK-NEXT:    [[TMP_6:%.*]] = and <2 x i32> [[X2]], <i32 -131072, i32 -131072>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_6]]
+;
+  %x = lshr <2 x i32> %AA, <i32 17, i32 17>
+  %tmp.3 = lshr <2 x i32> %tmp.2, <i32 17, i32 17>
+  %tmp.5 = add <2 x i32> %tmp.3, %x
+  %tmp.6 = shl <2 x i32> %tmp.5, <i32 17, i32 17>
+  ret <2 x i32> %tmp.6
+}
+
+;; handle casts between shifts.
+define i32 @test26(i32 %A) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[B:%.*]] = and i32 %A, -2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = lshr i32 %A, 1
+  %C = bitcast i32 %B to i32
+  %D = shl i32 %C, 1
+  ret i32 %D
+}
+
+
+define i1 @test27(i32 %x) nounwind {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %x, 8
+; CHECK-NEXT:    [[Z:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = lshr i32 %x, 3
+  %z = trunc i32 %y to i1
+  ret i1 %z
+}
+
+define i1 @test28(i8 %x) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = lshr i8 %x, 7
+  %cmp = icmp ne i8 %shr, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test28vec(<2 x i8> %x) {
+; CHECK-LABEL: @test28vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shr = lshr <2 x i8> %x, <i8 7, i8 7>
+  %cmp = icmp ne <2 x i8> %shr, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i8 @test28a(i8 %x, i8 %y) {
+; CHECK-LABEL: @test28a(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 %x, 7
+; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[COND1]], label %bb2, label %bb1
+; CHECK:       bb1:
+; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], %y
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+entry:
+; This shouldn't be transformed.
+  %tmp1 = lshr i8 %x, 7
+  %cond1 = icmp ne i8 %tmp1, 0
+  br i1 %cond1, label %bb1, label %bb2
+bb1:
+  ret i8 %tmp1
+bb2:
+  %tmp2 = add i8 %tmp1, %y
+  ret i8 %tmp2
+}
+
+
+define i32 @test29(i64 %d18) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP916:%.*]] = lshr i64 %d18, 63
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc i64 [[TMP916]] to i32
+; CHECK-NEXT:    ret i32 [[TMP10]]
+;
+entry:
+  %tmp916 = lshr i64 %d18, 32
+  %tmp917 = trunc i64 %tmp916 to i32
+  %tmp10 = lshr i32 %tmp917, 31
+  ret i32 %tmp10
+}
+
+
+define i32 @test30(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[X1:%.*]] = and i32 %A, %B
+; CHECK-NEXT:    [[Z:%.*]] = shl i32 [[X1]], %C
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %X = shl i32 %A, %C
+  %Y = shl i32 %B, %C
+  %Z = and i32 %X, %Y
+  ret i32 %Z
+}
+
+define i32 @test31(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[X1:%.*]] = or i32 %A, %B
+; CHECK-NEXT:    [[Z:%.*]] = lshr i32 [[X1]], %C
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %X = lshr i32 %A, %C
+  %Y = lshr i32 %B, %C
+  %Z = or i32 %X, %Y
+  ret i32 %Z
+}
+
+define i32 @test32(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[X1:%.*]] = xor i32 %A, %B
+; CHECK-NEXT:    [[Z:%.*]] = ashr i32 [[X1]], %C
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %X = ashr i32 %A, %C
+  %Y = ashr i32 %B, %C
+  %Z = xor i32 %X, %Y
+  ret i32 %Z
+}
+
+define i1 @test33(i32 %X) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[TMP1_MASK:%.*]] = and i32 %X, 16777216
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1_MASK]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = shl i32 %X, 7
+  %tmp2 = icmp slt i32 %tmp1, 0
+  ret i1 %tmp2
+}
+
+define <2 x i1> @test33vec(<2 x i32> %X) {
+; CHECK-LABEL: @test33vec(
+; CHECK-NEXT:    [[TMP1_MASK:%.*]] = and <2 x i32> %X, <i32 16777216, i32 16777216>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1_MASK]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp1 = shl <2 x i32> %X, <i32 7, i32 7>
+  %tmp2 = icmp slt <2 x i32> %tmp1, zeroinitializer
+  ret <2 x i1> %tmp2
+}
+
+define i1 @test34(i32 %X) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp1 = lshr i32 %X, 7
+  %tmp2 = icmp slt i32 %tmp1, 0
+  ret i1 %tmp2
+}
+
+define i1 @test35(i32 %X) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 %X, 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp1 = ashr i32 %X, 7
+  %tmp2 = icmp slt i32 %tmp1, 0
+  ret i1 %tmp2
+}
+
+define <2 x i1> @test35vec(<2 x i32> %X) {
+; CHECK-LABEL: @test35vec(
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i32> %X, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp1 = ashr <2 x i32> %X, <i32 7, i32 7>
+  %tmp2 = icmp slt <2 x i32> %tmp1, zeroinitializer
+  ret <2 x i1> %tmp2
+}
+
+define i128 @test36(i128 %A, i128 %B) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    [[TMP231:%.*]] = or i128 %B, %A
+; CHECK-NEXT:    [[INS:%.*]] = and i128 [[TMP231]], 18446744073709551615
+; CHECK-NEXT:    ret i128 [[INS]]
+;
+  %tmp27 = shl i128 %A, 64
+  %tmp23 = shl i128 %B, 64
+  %ins = or i128 %tmp23, %tmp27
+  %tmp45 = lshr i128 %ins, 64
+  ret i128 %tmp45
+}
+
+define i64 @test37(i128 %A, i32 %B) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    [[TMP22:%.*]] = zext i32 %B to i128
+; CHECK-NEXT:    [[TMP23:%.*]] = shl nuw nsw i128 [[TMP22]], 32
+; CHECK-NEXT:    [[INS:%.*]] = or i128 [[TMP23]], %A
+; CHECK-NEXT:    [[TMP46:%.*]] = trunc i128 [[INS]] to i64
+; CHECK-NEXT:    ret i64 [[TMP46]]
+;
+  %tmp27 = shl i128 %A, 64
+  %tmp22 = zext i32 %B to i128
+  %tmp23 = shl i128 %tmp22, 96
+  %ins = or i128 %tmp23, %tmp27
+  %tmp45 = lshr i128 %ins, 64
+  %tmp46 = trunc i128 %tmp45 to i64
+  ret i64 %tmp46
+}
+
+define <2 x i32> @shl_nuw_nsw_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @shl_nuw_nsw_splat_vec(
+; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> %x to <2 x i32>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], <i32 17, i32 17>
+; CHECK-NEXT:    ret <2 x i32> [[T3]]
+;
+  %t2 = zext <2 x i8> %x to <2 x i32>
+  %t3 = shl <2 x i32> %t2, <i32 17, i32 17>
+  ret <2 x i32> %t3
+}
+
+define i32 @test38(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    [[REM1:%.*]] = and i32 %x, 31
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[REM1]]
+; CHECK-NEXT:    ret i32 [[SHL]]
+;
+  %rem = srem i32 %x, 32
+  %shl = shl i32 1, %rem
+  ret i32 %shl
+}
+
+; <rdar://problem/8756731>
+define i8 @test39(i32 %a0) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i32 %a0 to i8
+; CHECK-NEXT:    [[TMP5:%.*]] = shl i8 [[TMP4]], 5
+; CHECK-NEXT:    [[TMP49:%.*]] = shl i8 [[TMP4]], 6
+; CHECK-NEXT:    [[TMP50:%.*]] = and i8 [[TMP49]], 64
+; CHECK-NEXT:    [[TMP51:%.*]] = xor i8 [[TMP50]], [[TMP5]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[TMP4]], 2
+; CHECK-NEXT:    [[TMP54:%.*]] = and i8 [[TMP0]], 16
+; CHECK-NEXT:    [[TMP551:%.*]] = or i8 [[TMP54]], [[TMP51]]
+; CHECK-NEXT:    ret i8 [[TMP551]]
+;
+entry:
+  %tmp4 = trunc i32 %a0 to i8
+  %tmp5 = shl i8 %tmp4, 5
+  %tmp48 = and i8 %tmp5, 32
+  %tmp49 = lshr i8 %tmp48, 5
+  %tmp50 = mul i8 %tmp49, 64
+  %tmp51 = xor i8 %tmp50, %tmp5
+  %tmp52 = and i8 %tmp51, -128
+  %tmp53 = lshr i8 %tmp52, 7
+  %tmp54 = mul i8 %tmp53, 16
+  %tmp55 = xor i8 %tmp54, %tmp51
+  ret i8 %tmp55
+}
+
+; PR9809
+define i32 @test40(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %b, 2
+; CHECK-NEXT:    [[DIV:%.*]] = lshr i32 %a, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %shl1 = shl i32 1, %b
+  %shl2 = shl i32 %shl1, 2
+  %div = udiv i32 %a, %shl2
+  ret i32 %div
+}
+
+define i32 @test41(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 8, %b
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = shl i32 1, %b
+  %2 = shl i32 %1, 3
+  ret i32 %2
+}
+
+define i32 @test42(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[DIV:%.*]] = lshr exact i32 4096, %b
+; CHECK-NEXT:    [[DIV2:%.*]] = udiv i32 %a, [[DIV]]
+; CHECK-NEXT:    ret i32 [[DIV2]]
+;
+  %div = lshr i32 4096, %b    ; must be exact otherwise we'd divide by zero
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+}
+
+define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @test42vec(
+; CHECK-NEXT:    [[DIV:%.*]] = lshr exact <2 x i32> <i32 4096, i32 4096>, %b
+; CHECK-NEXT:    [[DIV2:%.*]] = udiv <2 x i32> %a, [[DIV]]
+; CHECK-NEXT:    ret <2 x i32> [[DIV2]]
+;
+  %div = lshr <2 x i32> <i32 4096, i32 4096>, %b    ; must be exact otherwise we'd divide by zero
+  %div2 = udiv <2 x i32> %a, %div
+  ret <2 x i32> %div2
+}
+
+define i32 @test43(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %b, 12
+; CHECK-NEXT:    [[DIV2:%.*]] = lshr i32 %a, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[DIV2]]
+;
+  %div = shl i32 4096, %b    ; must be exact otherwise we'd divide by zero
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+}
+
+define i32 @test44(i32 %a) nounwind {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:    [[Y:%.*]] = shl i32 %a, 5
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %y = shl nuw i32 %a, 1
+  %z = shl i32 %y, 4
+  ret i32 %z
+}
+
+define i32 @test45(i32 %a) nounwind {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:    [[Y:%.*]] = lshr i32 %a, 5
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %y = lshr exact i32 %a, 1
+  %z = lshr i32 %y, 4
+  ret i32 %z
+}
+
+; (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+
+define i32 @test46(i32 %a) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:    [[Z:%.*]] = ashr exact i32 %a, 2
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %y = ashr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+}
+
+; (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+
+define <2 x i32> @test46_splat_vec(<2 x i32> %a) {
+; CHECK-LABEL: @test46_splat_vec(
+; CHECK-NEXT:    [[Z:%.*]] = ashr exact <2 x i32> %a, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[Z]]
+;
+  %y = ashr exact <2 x i32> %a, <i32 3, i32 3>
+  %z = shl <2 x i32> %y, <i32 1, i32 1>
+  ret <2 x i32> %z
+}
+
+; (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+
+define i8 @test47(i8 %a) {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:    [[Z:%.*]] = lshr exact i8 %a, 2
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+  %y = lshr exact i8 %a, 3
+  %z = shl i8 %y, 1
+  ret i8 %z
+}
+
+; (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+
+define <2 x i8> @test47_splat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @test47_splat_vec(
+; CHECK-NEXT:    [[Z:%.*]] = lshr exact <2 x i8> %a, <i8 2, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[Z]]
+;
+  %y = lshr exact <2 x i8> %a, <i8 3, i8 3>
+  %z = shl <2 x i8> %y, <i8 1, i8 1>
+  ret <2 x i8> %z
+}
+
+; (X >>u,exact C1) << C2 --> X << (C2-C1) when C2 > C1
+
+define i32 @test48(i32 %x) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = lshr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+}
+
+; Verify that wrap flags are preserved from the original 'shl'.
+
+define i32 @test48_nuw_nsw(i32 %x) {
+; CHECK-LABEL: @test48_nuw_nsw(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = lshr exact i32 %x, 1
+  %B = shl nuw nsw i32 %A, 3
+  ret i32 %B
+}
+
+; (X >>u,exact C1) << C2 --> X << (C2-C1) when splatted C2 > C1
+
+define <2 x i32> @test48_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test48_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = lshr exact <2 x i32> %x, <i32 1, i32 1>
+  %B = shl nsw nuw <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X >>s,exact C1) << C2 --> X << (C2-C1) when C2 > C1
+
+define i32 @test49(i32 %x) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = ashr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+}
+
+; Verify that wrap flags are preserved from the original 'shl'.
+
+define i32 @test49_nuw_nsw(i32 %x) {
+; CHECK-LABEL: @test49_nuw_nsw(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = ashr exact i32 %x, 1
+  %B = shl nuw nsw i32 %A, 3
+  ret i32 %B
+}
+
+; (X >>s,exact C1) << C2 --> X << (C2-C1) when splatted C2 > C1
+
+define <2 x i32> @test49_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test49_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = ashr exact <2 x i32> %x, <i32 1, i32 1>
+  %B = shl nsw nuw <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+
+define i32 @test50(i32 %x) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT:    [[B:%.*]] = ashr i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = shl nsw i32 %x, 1
+  %B = ashr i32 %A, 3
+  ret i32 %B
+}
+
+; (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+; Also, check that exact is propagated.
+
+define <2 x i32> @test50_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test50_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl nsw <2 x i32> %x, <i32 1, i32 1>
+  %B = ashr exact <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
+
+define i32 @test51(i32 %x) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[B:%.*]] = lshr i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = shl nuw i32 %x, 1
+  %B = lshr i32 %A, 3
+  ret i32 %B
+}
+
+; (X <<nuw C1) >>u C2 --> X >>u (C2-C1) with splats
+; Also, check that exact is propagated.
+
+define <2 x i32> @test51_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test51_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = lshr exact <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl nuw <2 x i32> %x, <i32 1, i32 1>
+  %B = lshr exact <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
+; Also, check that exact is propagated.
+
+define i32 @test51_no_nuw(i32 %x) {
+; CHECK-LABEL: @test51_no_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 %x, 2
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[TMP1]], 536870911
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = shl i32 %x, 1
+  %B = lshr exact i32 %A, 3
+  ret i32 %B
+}
+
+; (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
+
+define <2 x i32> @test51_no_nuw_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test51_no_nuw_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[TMP1]], <i32 536870911, i32 536870911>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl <2 x i32> %x, <i32 1, i32 1>
+  %B = lshr <2 x i32> %A, <i32 3, i32 3>
+  ret <2 x i32> %B
+}
+
+; (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2)
+
+define i32 @test52(i32 %x) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT:    [[B:%.*]] = shl nsw i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = shl nsw i32 %x, 3
+  %B = ashr i32 %A, 1
+  ret i32 %B
+}
+
+; (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2)
+
+define <2 x i32> @test52_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test52_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl nsw <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl nsw <2 x i32> %x, <i32 3, i32 3>
+  %B = ashr <2 x i32> %A, <i32 1, i32 1>
+  ret <2 x i32> %B
+}
+
+; (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
+
+define i32 @test53(i32 %x) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw i32 %x, 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %A = shl nuw i32 %x, 3
+  %B = lshr i32 %A, 1
+  ret i32 %B
+}
+
+; (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
+
+define <2 x i32> @test53_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test53_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw <2 x i32> %x, <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %A = shl nuw <2 x i32> %x, <i32 3, i32 3>
+  %B = lshr <2 x i32> %A, <i32 1, i32 1>
+  ret <2 x i32> %B
+}
+
+; (X << C1) >>u C2  --> X << (C1 - C2) & (-1 >> C2)
+
+define i8 @test53_no_nuw(i8 %x) {
+; CHECK-LABEL: @test53_no_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 %x, 2
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[TMP1]], 124
+; CHECK-NEXT:    ret i8 [[B]]
+;
+  %A = shl i8 %x, 3
+  %B = lshr i8 %A, 1
+  ret i8 %B
+}
+
+; (X << C1) >>u C2  --> X << (C1 - C2) & (-1 >> C2)
+
+define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) {
+; CHECK-LABEL: @test53_no_nuw_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> %x, <i8 2, i8 2>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 124, i8 124>
+; CHECK-NEXT:    ret <2 x i8> [[B]]
+;
+  %A = shl <2 x i8> %x, <i8 3, i8 3>
+  %B = lshr <2 x i8> %A, <i8 1, i8 1>
+  ret <2 x i8> %B
+}
+
+define i32 @test54(i32 %x) {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 3
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[TMP1]], 16
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %and = and i32 %shl, 16
+  ret i32 %and
+}
+
+define <2 x i32> @test54_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test54_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> %x, <i32 3, i32 3>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    ret <2 x i32> [[AND]]
+;
+  %shr2 = lshr <2 x i32> %x, <i32 1, i32 1>
+  %shl = shl <2 x i32> %shr2, <i32 4, i32 4>
+  %and = and <2 x i32> %shl, <i32 16, i32 16>
+  ret <2 x i32> %and
+}
+
+define i32 @test55(i32 %x) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %x, 3
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[TMP1]], 8
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %or = or i32 %shl, 8
+  ret i32 %or
+}
+
+define i32 @test56(i32 %x) {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT:    [[SHR2:%.*]] = lshr i32 %x, 1
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[SHR2]], 4
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], 7
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %or = or i32 %shl, 7
+  ret i32 %or
+}
+
+define i32 @test57(i32 %x) {
+; CHECK-LABEL: @test57(
+; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 %x, 1
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[SHR1]], 4
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], 7
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %shr = ashr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %or = or i32 %shl, 7
+  ret i32 %or
+}
+
+define i32 @test58(i32 %x) {
+; CHECK-LABEL: @test58(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 %x, 3
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %shr = ashr i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 1
+  ret i32 %or
+}
+
+define <2 x i32> @test58_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test58_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> %x, <i32 3, i32 3>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[OR]]
+;
+  %shr = ashr <2 x i32> %x, <i32 4, i32 4>
+  %shl = shl <2 x i32> %shr, <i32 1, i32 1>
+  %or = or <2 x i32> %shl, <i32 1, i32 1>
+  ret <2 x i32> %or
+}
+
+define i32 @test59(i32 %x) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 %x, 4
+; CHECK-NEXT:    [[SHL:%.*]] = shl nsw i32 [[SHR]], 1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], 2
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %shr = ashr i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 2
+  ret i32 %or
+}
+
+; propagate "exact" trait
+define i32 @test60(i32 %x) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT:    [[SHL:%.*]] = ashr exact i32 %x, 3
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], 1
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %shr = ashr exact i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 1
+  ret i32 %or
+}
+
+; PR17026
+define void @test61(i128 %arg) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br i1 undef, label %bb1, label %bb12
+; CHECK:       bb1:
+; CHECK-NEXT:    br label %bb2
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 undef, label %bb3, label %bb7
+; CHECK:       bb3:
+; CHECK-NEXT:    br label %bb8
+; CHECK:       bb7:
+; CHECK-NEXT:    br i1 undef, label %bb8, label %bb2
+; CHECK:       bb8:
+; CHECK-NEXT:    br i1 undef, label %bb11, label %bb12
+; CHECK:       bb11:
+; CHECK-NEXT:    br i1 undef, label %bb1, label %bb12
+; CHECK:       bb12:
+; CHECK-NEXT:    ret void
+;
+bb:
+  br i1 undef, label %bb1, label %bb12
+
+bb1:                                              ; preds = %bb11, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb1
+  br i1 undef, label %bb3, label %bb7
+
+bb3:                                              ; preds = %bb2
+  %tmp = lshr i128 %arg, 36893488147419103232
+  %tmp4 = shl i128 %tmp, 0
+  %tmp5 = or i128 %tmp4, undef
+  %tmp6 = trunc i128 %tmp5 to i16
+  br label %bb8
+
+bb7:                                              ; preds = %bb2
+  br i1 undef, label %bb8, label %bb2
+
+bb8:                                              ; preds = %bb7, %bb3
+  %tmp9 = phi i16 [ %tmp6, %bb3 ], [ undef, %bb7 ]
+  %tmp10 = icmp eq i16 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb8
+  br i1 undef, label %bb1, label %bb12
+
+bb12:                                             ; preds = %bb11, %bb8, %bb
+  ret void
+}
+
+define i32 @test62(i32 %a) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT:    ret i32 undef
+;
+  %b = ashr i32 %a, 32  ; shift all bits out
+  ret i32 %b
+}
+
+define <4 x i32> @test62_splat_vector(<4 x i32> %a) {
+; CHECK-LABEL: @test62_splat_vector(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
+  %b = ashr <4 x i32> %a, <i32 32, i32 32, i32 32, i32 32>  ; shift all bits out
+  ret <4 x i32> %b
+}
+
+define <4 x i32> @test62_non_splat_vector(<4 x i32> %a) {
+; CHECK-LABEL: @test62_non_splat_vector(
+; CHECK-NEXT:    [[B:%.*]] = ashr <4 x i32> %a, <i32 32, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[B]]
+;
+  %b = ashr <4 x i32> %a, <i32 32, i32 0, i32 1, i32 2>  ; shift all bits out
+  ret <4 x i32> %b
+}
+
+define <2 x i65> @test_63(<2 x i64> %t) {
+; CHECK-LABEL: @test_63(
+; CHECK-NEXT:    [[A:%.*]] = zext <2 x i64> %t to <2 x i65>
+; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i65> [[A]], <i65 33, i65 33>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i65> [[SEXT]], <i65 33, i65 33>
+; CHECK-NEXT:    ret <2 x i65> [[B]]
+;
+  %a = zext <2 x i64> %t to <2 x i65>
+  %sext = shl <2 x i65> %a, <i65 33, i65 33>
+  %b = ashr <2 x i65> %sext, <i65 33, i65 33>
+  ret <2 x i65> %b
+}
+
+define i64 @test_64(i32 %t) {
+; CHECK-LABEL: @test_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 %t, 8
+; CHECK-NEXT:    [[SHL:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[SHL]]
+;
+  %and = and i32 %t, 16777215
+  %ext = zext i32 %and to i64
+  %shl = shl i64 %ext, 8
+  ret i64 %shl
+}
+
+define <2 x i64> @test_64_splat_vec(<2 x i32> %t) {
+; CHECK-LABEL: @test_64_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> %t, <i32 8, i32 8>
+; CHECK-NEXT:    [[SHL:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SHL]]
+;
+  %and = and <2 x i32> %t, <i32 16777215, i32 16777215>
+  %ext = zext <2 x i32> %and to <2 x i64>
+  %shl = shl <2 x i64> %ext, <i64 8, i64 8>
+  ret <2 x i64> %shl
+}
+
+define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) {
+; CHECK-LABEL: @ashr_demanded_bits_splat(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr <2 x i8> %x, <i8 7, i8 7>
+; CHECK-NEXT:    ret <2 x i8> [[SHR]]
+;
+  %and = and <2 x i8> %x, <i8 128, i8 128>
+  %shr = ashr <2 x i8> %and, <i8 7, i8 7>
+  ret <2 x i8> %shr
+}
+
+define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_demanded_bits_splat(
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> %x, <i8 7, i8 7>
+; CHECK-NEXT:    ret <2 x i8> [[SHR]]
+;
+  %and = and <2 x i8> %x, <i8 128, i8 128>
+  %shr = lshr <2 x i8> %and, <i8 7, i8 7>
+  ret <2 x i8> %shr
+}
+
+; Make sure known bits works correctly with non power of 2 bit widths.
+define i7 @test65(i7 %a, i7 %b) {
+; CHECK-LABEL: @test65(
+; CHECK-NEXT:    ret i7 0
+;
+  %shiftamt = and i7 %b, 6 ; this ensures the shift amount is even and less than the bit width.
+  %x = lshr i7 42, %shiftamt ; 42 has a zero in every even numbered bit and a one in every odd bit.
+  %y = and i7 %x, 1 ; this extracts the lsb which should be 0 because we shifted an even number of bits and all even bits of the shift input are 0.
+  ret i7 %y
+}
+
+define i32 @shl_select_add_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @shl_select_add_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = add i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = shl i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @shl_select_add_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @shl_select_add_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = add i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = shl i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @shl_select_and_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @shl_select_and_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = shl i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @shl_select_and_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @shl_select_and_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = shl i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @lshr_select_and_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @lshr_select_and_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = lshr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @lshr_select_and_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @lshr_select_and_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = lshr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @ashr_select_and_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @ashr_select_and_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], -1073741821
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i32 %x, 2147483655
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = ashr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @ashr_select_and_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @ashr_select_and_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], -1073741821
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = and i32 %x, 2147483655
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = ashr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @shl_select_or_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @shl_select_or_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = or i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = shl i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @shl_select_or_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @shl_select_or_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = or i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = shl i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @lshr_select_or_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @lshr_select_or_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = or i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = lshr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @lshr_select_or_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @lshr_select_or_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = or i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = lshr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @ashr_select_or_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @ashr_select_or_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = or i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = ashr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @ashr_select_or_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @ashr_select_or_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = or i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = ashr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @shl_select_xor_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @shl_select_xor_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = xor i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = shl i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @shl_select_xor_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @shl_select_xor_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = xor i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = shl i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @lshr_select_xor_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @lshr_select_xor_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = xor i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = lshr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @lshr_select_xor_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @lshr_select_xor_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = xor i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = lshr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @ashr_select_xor_true(i32 %x, i1 %cond) {
+; CHECK-LABEL: @ashr_select_xor_true(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = xor i32 %x, 7
+  %2 = select i1 %cond, i32 %1, i32 %x
+  %3 = ashr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @ashr_select_xor_false(i32 %x, i1 %cond) {
+; CHECK-LABEL: @ashr_select_xor_false(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = xor i32 %x, 7
+  %2 = select i1 %cond, i32 %x, i32 %1
+  %3 = ashr i32 %2, 1
+  ret i32 %3
+}
+
+; OSS Fuzz #4871
+; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=4871
+define i177 @lshr_out_of_range(i177 %Y, i177** %A2) {
+; CHECK-LABEL: @lshr_out_of_range(
+; CHECK-NEXT:    store i177** [[A2:%.*]], i177*** undef, align 8
+; CHECK-NEXT:    ret i177 0
+;
+  %B5 = udiv i177 %Y, -1
+  %B4 = add i177 %B5, -1
+  %B2 = add i177 %B4, -1
+  %B6 = mul i177 %B5, %B2
+  %B3 = add i177 %B2, %B2
+  %B10 = sub i177 %B5, %B3
+  %B12 = lshr i177 %Y, %B6
+  %C8 = icmp ugt i177 %B12, %B4
+  %G18 = getelementptr i177*, i177** %A2, i1 %C8
+  store i177** %G18, i177*** undef
+  %B1 = udiv i177 %B10, %B6
+  ret i177 %B1
+}
+
+; OSS Fuzz #5032
+; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5032
+define void @ashr_out_of_range(i177* %A) {
+; CHECK-LABEL: @ashr_out_of_range(
+; CHECK-NEXT:    ret void
+;
+  %L = load i177, i177* %A
+  %B5 = udiv i177 %L, -1
+  %B4 = add i177 %B5, -1
+  %B2 = add i177 %B4, -1
+  %G11 = getelementptr i177, i177* %A, i177 %B2
+  %L7 = load i177, i177* %G11
+  %B6 = mul i177 %B5, %B2
+  %B24 = ashr i177 %L7, %B6
+  %B36 = and i177 %L7, %B4
+  %C17 = icmp sgt i177 %B36, %B24
+  %G62 = getelementptr i177, i177* %G11, i1 %C17
+  %B28 = urem i177 %B24, %B6
+  store i177 %B28, i177* %G62
+  ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/should-change-type.ll b/llvm/test/Transforms/InstCombine/should-change-type.ll
new file mode 100644
index 00000000000..f825de1907f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/should-change-type.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "n64"
+
+; Tests for removing zext/trunc from/to i8, i16 and i32, even if it is
+; not a legal type.
+
+define i8 @test1(i8 %x, i8 %y) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %xz = zext i8 %x to i64
+  %yz = zext i8 %y to i64
+  %c = add i64 %xz, %yz
+  %d = trunc i64 %c to i8
+  ret i8 %d
+}
+
+define i16 @test2(i16 %x, i16 %y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[C:%.*]] = add i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i16 [[C]]
+;
+  %xz = zext i16 %x to i64
+  %yz = zext i16 %y to i64
+  %c = add i64 %xz, %yz
+  %d = trunc i64 %c to i16
+  ret i16 %d
+}
+
+define i32 @test3(i32 %x, i32 %y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %xz = zext i32 %x to i64
+  %yz = zext i32 %y to i64
+  %c = add i64 %xz, %yz
+  %d = trunc i64 %c to i32
+  ret i32 %d
+}
+
+define i9 @test4(i9 %x, i9 %y) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[XZ:%.*]] = zext i9 [[X:%.*]] to i64
+; CHECK-NEXT:    [[YZ:%.*]] = zext i9 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[C:%.*]] = add nuw nsw i64 [[XZ]], [[YZ]]
+; CHECK-NEXT:    [[D:%.*]] = trunc i64 [[C]] to i9
+; CHECK-NEXT:    ret i9 [[D]]
+;
+  %xz = zext i9 %x to i64
+  %yz = zext i9 %y to i64
+  %c = add i64 %xz, %yz
+  %d = trunc i64 %c to i9
+  ret i9 %d
+}
diff --git a/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll b/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll
new file mode 100644
index 00000000000..bf9609623d8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Narrow the select operands to eliminate the existing shuffles and replace a wide select with a narrow select.
+
+define <2 x i8> @narrow_shuffle_of_select(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @narrow_shuffle_of_select(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y
+  %r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i8> %r
+}
+
+; The 1st shuffle is not extending with undefs, but demanded elements corrects that.
+
+define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @narrow_shuffle_of_select_overspecified_extend(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y
+  %r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i8> %r
+}
+
+; Verify that undef elements are acceptable for identity shuffle mask. Also check FP types.
+
+define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @narrow_shuffle_of_select_undefs(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]]
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %widecmp = shufflevector <3 x i1> %cmp, <3 x i1> undef, <4 x i32> <i32 undef, i32 1, i32 2, i32 undef>
+  %widesel = select <4 x i1> %widecmp, <4 x float> %x, <4 x float> %y
+  %r = shufflevector <4 x float> %widesel, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  ret <3 x float> %r
+}
+
+declare void @use(<4 x i8>)
+declare void @use_cmp(<4 x i1>)
+
+; Negative test - extra use would require more instructions than we started with.
+
+define <2 x i8> @narrow_shuffle_of_select_use1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @narrow_shuffle_of_select_use1(
+; CHECK-NEXT:    [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]
+; CHECK-NEXT:    call void @use(<4 x i8> [[WIDESEL]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y
+  call void @use(<4 x i8> %widesel)
+  %r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i8> %r
+}
+
+; Negative test - extra use would require more instructions than we started with.
+
+define <2 x i8> @narrow_shuffle_of_select_use2(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @narrow_shuffle_of_select_use2(
+; CHECK-NEXT:    [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    call void @use_cmp(<4 x i1> [[WIDECMP]])
+; CHECK-NEXT:    [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  call void @use_cmp(<4 x i1> %widecmp)
+  %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y
+  %r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i8> %r
+}
+
+; Negative test - mismatched types would require extra shuffling.
+
+define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types1(
+; CHECK-NEXT:    [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %widesel = select <4 x i1> %widecmp, <4 x i8> %x, <4 x i8> %y
+  %r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x i8> %r
+}
+
+; Negative test - mismatched types would require extra shuffling.
+
+define <3 x i8> @narrow_shuffle_of_select_mismatch_types2(<4 x i1> %cmp, <6 x i8> %x, <6 x i8> %y) {
+; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types2(
+; CHECK-NEXT:    [[WIDECMP:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[WIDESEL:%.*]] = select <6 x i1> [[WIDECMP]], <6 x i8> [[X:%.*]], <6 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <6 x i8> [[WIDESEL]], <6 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %widecmp = shufflevector <4 x i1> %cmp, <4 x i1> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef>
+  %widesel = select <6 x i1> %widecmp, <6 x i8> %x, <6 x i8> %y
+  %r = shufflevector <6 x i8> %widesel, <6 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x i8> %r
+}
+
+; Narrowing constants does not require creating new narrowing shuffle instructions.
+
+define <2 x i8> @narrow_shuffle_of_select_consts(<2 x i1> %cmp) {
+; CHECK-LABEL: @narrow_shuffle_of_select_consts(
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> <i8 -1, i8 -2>, <2 x i8> <i8 1, i8 2>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %widesel = select <4 x i1> %widecmp, <4 x i8> <i8 -1, i8 -2, i8 -3, i8 -4>, <4 x i8> <i8 1, i8 2, i8 3, i8 4>
+  %r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i8> %r
+}
+
+; PR38691 - https://bugs.llvm.org/show_bug.cgi?id=38691
+; If the operands are widened only to be narrowed back, then all of the shuffles are unnecessary.
+
+define <2 x i8> @narrow_shuffle_of_select_with_widened_ops(<2 x i1> %cmp, <2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @narrow_shuffle_of_select_with_widened_ops(
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %widey = shufflevector <2 x i8> %y, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %widecmp = shufflevector <2 x i1> %cmp, <2 x i1> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %widesel = select <4 x i1> %widecmp, <4 x i8> %widex, <4 x i8> %widey
+  %r = shufflevector <4 x i8> %widesel, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i8> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll
new file mode 100644
index 00000000000..6cda586f913
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll
@@ -0,0 +1,1466 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Try to eliminate binops and shuffles when the shuffle is a select in disguise:
+; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806
+
+define <4 x i32> @add(<4 x i32> %v) {
+; CHECK-LABEL: @add(
+; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+; Propagate flags when possible.
+
+define <4 x i32> @add_nuw_nsw(<4 x i32> %v) {
+; CHECK-LABEL: @add_nuw_nsw(
+; CHECK-NEXT:    [[S:%.*]] = add nuw nsw <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @add_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @add_undef_mask_elt(
+; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 undef, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
+  ret <4 x i32> %s
+}
+
+; Poison flags must be dropped or undef must be replaced with safe constant.
+
+define <4 x i32> @add_nuw_nsw_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @add_nuw_nsw_undef_mask_elt(
+; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 undef, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+; Constant operand 0 (LHS) could work for some non-commutative binops?
+
+define <4 x i32> @sub(<4 x i32> %v) {
+; CHECK-LABEL: @sub(
+; CHECK-NEXT:    [[B:%.*]] = sub <4 x i32> <i32 undef, i32 undef, i32 undef, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+; If any element of the shuffle mask operand is undef, that element of the result is undef.
+; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
+; Preserve flags when possible. It's not safe to propagate poison-generating flags with undef constants.
+
+define <4 x i32> @mul(<4 x i32> %v) {
+; CHECK-LABEL: @mul(
+; CHECK-NEXT:    [[S:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 12, i32 1, i32 14>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = mul nsw nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @shl(<4 x i32> %v) {
+; CHECK-LABEL: @shl(
+; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @shl_nsw(<4 x i32> %v) {
+; CHECK-LABEL: @shl_nsw(
+; CHECK-NEXT:    [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @shl_undef_mask_elt(
+; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @shl_nuw_undef_mask_elt(
+; CHECK-NEXT:    [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @lshr_constant_op0(<4 x i32> %v) {
+; CHECK-LABEL: @lshr_constant_op0(
+; CHECK-NEXT:    [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) {
+; CHECK-LABEL: @lshr_exact_constant_op0(
+; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @lshr_undef_mask_elt(
+; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @lshr_exact_undef_mask_elt(
+; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = lshr exact  <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @lshr_constant_op1(<4 x i32> %v) {
+; CHECK-LABEL: @lshr_constant_op1(
+; CHECK-NEXT:    [[B:%.*]] = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+; Try weird types.
+
+define <3 x i32> @ashr(<3 x i32> %v) {
+; CHECK-LABEL: @ashr(
+; CHECK-NEXT:    [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13>
+; CHECK-NEXT:    ret <3 x i32> [[S]]
+;
+  %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13>
+  %s = shufflevector <3 x i32> %b, <3 x i32> %v, <3 x i32> <i32 3, i32 1, i32 2>
+  ret <3 x i32> %s
+}
+
+define <3 x i42> @and(<3 x i42> %v) {
+; CHECK-LABEL: @and(
+; CHECK-NEXT:    [[S:%.*]] = and <3 x i42> [[V:%.*]], <i42 -1, i42 12, i42 undef>
+; CHECK-NEXT:    ret <3 x i42> [[S]]
+;
+  %b = and <3 x i42> %v, <i42 11, i42 12, i42 13>
+  %s = shufflevector <3 x i42> %v, <3 x i42> %b, <3 x i32> <i32 0, i32 4, i32 undef>
+  ret <3 x i42> %s
+}
+
+; It doesn't matter if the intermediate op has extra uses.
+
+declare void @use_v4i32(<4 x i32>)
+
+define <4 x i32> @or(<4 x i32> %v) {
+; CHECK-LABEL: @or(
+; CHECK-NEXT:    [[B:%.*]] = or <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
+; CHECK-NEXT:    [[S:%.*]] = or <4 x i32> [[V]], <i32 0, i32 0, i32 13, i32 14>
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[B]])
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = or <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  call void @use_v4i32(<4 x i32> %b)
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @xor(<4 x i32> %v) {
+; CHECK-LABEL: @xor(
+; CHECK-NEXT:    [[S:%.*]] = xor <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 0, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = xor <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @udiv(<4 x i32> %v) {
+; CHECK-LABEL: @udiv(
+; CHECK-NEXT:    [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @udiv_exact(<4 x i32> %v) {
+; CHECK-LABEL: @udiv_exact(
+; CHECK-NEXT:    [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @udiv_undef_mask_elt(
+; CHECK-NEXT:    [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @udiv_exact_undef_mask_elt(
+; CHECK-NEXT:    [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @sdiv(<4 x i32> %v) {
+; CHECK-LABEL: @sdiv(
+; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @sdiv_exact(<4 x i32> %v) {
+; CHECK-LABEL: @sdiv_exact(
+; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x i32> %s
+}
+
+; Div/rem need special handling if the shuffle has undef elements.
+
+define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @sdiv_undef_mask_elt(
+; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @sdiv_exact_undef_mask_elt(
+; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @urem(<4 x i32> %v) {
+; CHECK-LABEL: @urem(
+; CHECK-NEXT:    [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) {
+; CHECK-LABEL: @urem_undef_mask_elt(
+; CHECK-NEXT:    [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+  ret <4 x i32> %s
+}
+
+define <4 x i32> @srem(<4 x i32> %v) {
+; CHECK-LABEL: @srem(
+; CHECK-NEXT:    [[B:%.*]] = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
+  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x i32> %s
+}
+
+; Try FP ops/types.
+
+define <4 x float> @fadd(<4 x float> %v) {
+; CHECK-LABEL: @fadd(
+; CHECK-NEXT:    [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[S]]
+;
+  %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
+  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %s
+}
+
+define <4 x double> @fsub(<4 x double> %v) {
+; CHECK-LABEL: @fsub(
+; CHECK-NEXT:    [[B:%.*]] = fsub <4 x double> <double undef, double undef, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[S]]
+;
+  %b = fsub <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
+  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %s
+}
+
+; Propagate any FMF.
+
+define <4 x float> @fmul(<4 x float> %v) {
+; CHECK-LABEL: @fmul(
+; CHECK-NEXT:    [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[S]]
+;
+  %b = fmul nnan ninf <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
+  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %s
+}
+
+define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
+; CHECK-LABEL: @fdiv_constant_op0(
+; CHECK-NEXT:    [[B:%.*]] = fdiv fast <4 x double> <double undef, double undef, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[S]]
+;
+  %b = fdiv fast <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
+  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %s
+}
+
+define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
+; CHECK-LABEL: @fdiv_constant_op1(
+; CHECK-NEXT:    [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
+; CHECK-NEXT:    ret <4 x double> [[S]]
+;
+  %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0>
+  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %s
+}
+
+define <4 x double> @frem(<4 x double> %v) {
+; CHECK-LABEL: @frem(
+; CHECK-NEXT:    [[B:%.*]] = frem <4 x double> <double 4.100000e+01, double 4.200000e+01, double undef, double undef>, [[V:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[S]]
+;
+  %b = frem <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
+  %s = shufflevector <4 x double> %b, <4 x double> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x double> %s
+}
+
+; Tests where both operands of the shuffle are binops with the same opcode.
+
+define <4 x i32> @add_add(<4 x i32> %v0) {
+; CHECK-LABEL: @add_add(
+; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @add_add_nsw(<4 x i32> %v0) {
+; CHECK-LABEL: @add_add_nsw(
+; CHECK-NEXT:    [[T3:%.*]] = add nsw <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @add_add_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Poison flags must be dropped or undef must be replaced with safe constant.
+
+define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @add_add_nsw_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Constant operand 0 (LHS) also works.
+
+define <4 x i32> @sub_sub(<4 x i32> %v0) {
+; CHECK-LABEL: @sub_sub(
+; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sub_sub_nuw(<4 x i32> %v0) {
+; CHECK-LABEL: @sub_sub_nuw(
+; CHECK-NEXT:    [[T3:%.*]] = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @sub_sub_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Poison flags must be dropped or undef must be replaced with safe constant.
+
+define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; If any element of the shuffle mask operand is undef, that element of the result is undef.
+; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
+
+define <4 x i32> @mul_mul(<4 x i32> %v0) {
+; CHECK-LABEL: @mul_mul(
+; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 6, i32 3, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Preserve flags when possible.
+
+define <4 x i32> @shl_shl(<4 x i32> %v0) {
+; CHECK-LABEL: @shl_shl(
+; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) {
+; CHECK-LABEL: @shl_shl_nuw(
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Shift by undef is poison. Undef must be replaced by safe constant.
+
+define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @shl_shl_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+  ret <4 x i32> %t3
+}
+
+; Shift by undef is poison. Undef must be replaced by safe constant.
+
+define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+  ret <4 x i32> %t3
+}
+
+; Can't propagate the flag here.
+
+define <4 x i32> @lshr_lshr(<4 x i32> %v0) {
+; CHECK-LABEL: @lshr_lshr(
+; CHECK-NEXT:    [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Try weird types.
+
+define <3 x i32> @ashr_ashr(<3 x i32> %v0) {
+; CHECK-LABEL: @ashr_ashr(
+; CHECK-NEXT:    [[T3:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 4, i32 2, i32 3>
+; CHECK-NEXT:    ret <3 x i32> [[T3]]
+;
+  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
+  %t2 = ashr <3 x i32> %v0, <i32 4, i32 5, i32 6>
+  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
+  ret <3 x i32> %t3
+}
+
+define <3 x i42> @and_and(<3 x i42> %v0) {
+; CHECK-LABEL: @and_and(
+; CHECK-NEXT:    [[T3:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 5, i42 undef>
+; CHECK-NEXT:    ret <3 x i42> [[T3]]
+;
+  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
+  %t2 = and <3 x i42> %v0, <i42 4, i42 5, i42 6>
+  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
+  ret <3 x i42> %t3
+}
+
+; It doesn't matter if the intermediate ops have extra uses.
+
+define <4 x i32> @or_or(<4 x i32> %v0) {
+; CHECK-LABEL: @or_or(
+; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[T3:%.*]] = or <4 x i32> [[V0]], <i32 5, i32 6, i32 3, i32 4>
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = or <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  call void @use_v4i32(<4 x i32> %t1)
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @xor_xor(<4 x i32> %v0) {
+; CHECK-LABEL: @xor_xor(
+; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[T3:%.*]] = xor <4 x i32> [[V0]], <i32 1, i32 6, i32 3, i32 4>
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = xor <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  call void @use_v4i32(<4 x i32> %t2)
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @udiv_udiv(<4 x i32> %v0) {
+; CHECK-LABEL: @udiv_udiv(
+; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]]
+; CHECK-NEXT:    [[T3:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0]]
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  call void @use_v4i32(<4 x i32> %t1)
+  call void @use_v4i32(<4 x i32> %t2)
+  ret <4 x i32> %t3
+}
+
+; Div/rem need special handling if the shuffle has undef elements.
+
+define <4 x i32> @sdiv_sdiv(<4 x i32> %v0) {
+; CHECK-LABEL: @sdiv_sdiv(
+; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sdiv_sdiv_exact(<4 x i32> %v0) {
+; CHECK-LABEL: @sdiv_sdiv_exact(
+; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @sdiv_sdiv_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @urem_urem(<4 x i32> %v0) {
+; CHECK-LABEL: @urem_urem(
+; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x i32> %t3
+}
+
+; This is folded by using a safe constant.
+
+define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @urem_urem_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 0>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @srem_srem(<4 x i32> %v0) {
+; CHECK-LABEL: @srem_srem(
+; CHECK-NEXT:    [[T3:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 7, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x i32> %t3
+}
+
+; This is folded by using a safe constant.
+
+define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) {
+; CHECK-LABEL: @srem_srem_undef_mask_elt(
+; CHECK-NEXT:    [[T3:%.*]] = srem <4 x i32> <i32 1, i32 0, i32 7, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Try FP ops/types.
+
+define <4 x float> @fadd_fadd(<4 x float> %v0) {
+; CHECK-LABEL: @fadd_fadd(
+; CHECK-NEXT:    [[T3:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[T3]]
+;
+  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %t2 = fadd <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
+  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %t3
+}
+
+define <4 x double> @fsub_fsub(<4 x double> %v0) {
+; CHECK-LABEL: @fsub_fsub(
+; CHECK-NEXT:    [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
+; Intersect any FMF.
+
+define <4 x float> @fmul_fmul(<4 x float> %v0) {
+; CHECK-LABEL: @fmul_fmul(
+; CHECK-NEXT:    [[T3:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[T3]]
+;
+  %t1 = fmul nnan ninf <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %t2 = fmul nnan ninf <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
+  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %t3
+}
+
+define <4 x double> @fdiv_fdiv(<4 x double> %v0) {
+; CHECK-LABEL: @fdiv_fdiv(
+; CHECK-NEXT:    [[T3:%.*]] = fdiv nnan arcp <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = fdiv fast <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = fdiv nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
+; The variable operand must be either the first operand or second operand in both binops.
+
+define <4 x double> @frem_frem(<4 x double> %v0) {
+; CHECK-LABEL: @frem_frem(
+; CHECK-NEXT:    [[T1:%.*]] = frem <4 x double> <double 1.000000e+00, double 2.000000e+00, double undef, double undef>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = frem <4 x double> [[V0]], <double undef, double undef, double 7.000000e+00, double 8.000000e+00>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = frem <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = frem <4 x double> %v0, <double 5.0, double 6.0, double 7.0, double 8.0>
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
+define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @add_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = add <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Constant operand 0 (LHS) also works.
+
+define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sub_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sub_2_vars_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sub_2_vars_undef_mask_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Poison flags must be dropped or undef must be replaced with safe constant.
+
+define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; If any element of the shuffle mask operand is undef, that element of the result is undef.
+; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
+
+define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @mul_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @mul_2_vars_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @mul_2_vars_undef_mask_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Poison flags must be dropped or undef must be replaced with safe constant.
+
+define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Preserve flags when possible.
+
+define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @shl_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @shl_2_vars_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Shift by undef is poison. Undef is replaced by safe constant.
+
+define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @shl_2_vars_undef_mask_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+  ret <4 x i32> %t3
+}
+
+; Shift by undef is poison. Undef is replaced by safe constant.
+
+define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+  ret <4 x i32> %t3
+}
+
+; Can't propagate the flag here.
+
+define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @lshr_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @lshr_2_vars_exact(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.
+
+define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @lshr_2_vars_undef_mask_elt(
+; CHECK-NEXT:    [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.
+
+define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt(
+; CHECK-NEXT:    [[T1:%.*]] = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Try weird types.
+
+define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) {
+; CHECK-LABEL: @ashr_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> <i32 0, i32 4, i32 5>
+; CHECK-NEXT:    [[T3:%.*]] = ashr <3 x i32> [[TMP1]], <i32 4, i32 2, i32 3>
+; CHECK-NEXT:    ret <3 x i32> [[T3]]
+;
+  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
+  %t2 = ashr <3 x i32> %v1, <i32 4, i32 5, i32 6>
+  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
+  ret <3 x i32> %t3
+}
+
+define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) {
+; CHECK-LABEL: @and_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> <i32 0, i32 4, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = and <3 x i42> [[TMP1]], <i42 1, i42 5, i42 undef>
+; CHECK-NEXT:    ret <3 x i42> [[T3]]
+;
+  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
+  %t2 = and <3 x i42> %v1, <i42 4, i42 5, i42 6>
+  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
+  ret <3 x i42> %t3
+}
+
+; It doesn't matter if only one intermediate op has extra uses.
+
+define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @or_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 4>
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  call void @use_v4i32(<4 x i32> %t1)
+  ret <4 x i32> %t3
+}
+
+; But we don't transform if both intermediate values have extra uses.
+
+define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @xor_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
+; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = xor <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  call void @use_v4i32(<4 x i32> %t1)
+  call void @use_v4i32(<4 x i32> %t2)
+  ret <4 x i32> %t3
+}
+
+; Div/rem need special handling if the shuffle has undef elements.
+
+define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @udiv_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = udiv <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @udiv_2_vars_exact(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = udiv exact <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; TODO: This could be transformed using a safe constant.
+
+define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @udiv_2_vars_undef_mask_elt(
+; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; TODO: This could be transformed using a safe constant.
+
+define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt(
+; CHECK-NEXT:    [[T1:%.*]] = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t3
+}
+
+; If the shuffle has no undefs, it's safe to shuffle the variables first.
+
+define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sdiv_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sdiv_2_vars_exact(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Div by undef is UB. Undef is replaced by safe constant.
+
+define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+  ret <4 x i32> %t3
+}
+
+; Div by undef is UB. Undef is replaced by safe constant.
+
+define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+  ret <4 x i32> %t3
+}
+
+; If the shuffle has no undefs, it's safe to shuffle the variables first.
+
+define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @urem_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @srem_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Try FP ops/types.
+
+define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) {
+; CHECK-LABEL: @fadd_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[T3]]
+;
+  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %t2 = fadd <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
+  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %t3
+}
+
+define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) {
+; CHECK-LABEL: @fsub_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
+; Intersect any FMF.
+
+define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) {
+; CHECK-LABEL: @fmul_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    ret <4 x float> [[T3]]
+;
+  %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %t2 = fmul reassoc nsz <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
+  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %t3
+}
+
+define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) {
+; CHECK-LABEL: @frem_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = frem nnan <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = frem nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
+; The variable operand must be either the first operand or second operand in both binops.
+
+define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) {
+; CHECK-LABEL: @fdiv_2_vars(
+; CHECK-NEXT:    [[T1:%.*]] = fdiv <4 x double> <double 1.000000e+00, double 2.000000e+00, double undef, double undef>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], <double undef, double undef, double 7.000000e+00, double 8.000000e+00>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x double> [[T3]]
+;
+  %t1 = fdiv <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
+  %t2 = fdiv <4 x double> %v1, <double 5.0, double 6.0, double 7.0, double 8.0>
+  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x double> %t3
+}
+
+; Shift-left with constant shift amount can be converted to mul to enable the fold.
+
+define <4 x i32> @mul_shl(<4 x i32> %v0) {
+; CHECK-LABEL: @mul_shl(
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved.
+
+define <4 x i32> @shl_mul(<4 x i32> %v0) {
+; CHECK-LABEL: @shl_mul(
+; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Demanded elements + simplification can remove the mul alone, but that's not the best case.
+
+define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
+; CHECK-LABEL: @mul_is_nop_shl(
+; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x i32> %t3
+}
+
+; Negative test: shift amount (operand 1) must be constant.
+
+define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) {
+; CHECK-LABEL: @shl_mul_not_constant_shift_amount(
+; CHECK-NEXT:    [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = mul <4 x i32> [[V0]], <i32 5, i32 6, i32 undef, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
+  %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Try with 2 variable inputs.
+
+define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @mul_shl_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 32, i32 64, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @shl_mul_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 undef, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 5, i32 undef, i32 8, i32 16>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Or with constant can be converted to add to enable the fold.
+; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'.
+; TODO: The 'or' constant is limited to a splat.
+
+define <4 x i32> @add_or(<4 x i32> %v) {
+; CHECK-LABEL: @add_or(
+; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
+  %t1 = add <4 x i32> %v0, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
+  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+; Try with 'or' as operand 0 of the shuffle.
+
+define <4 x i8> @or_add(<4 x i8> %v) {
+; CHECK-LABEL: @or_add(
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
+; CHECK-NEXT:    ret <4 x i8> [[T3]]
+;
+  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
+  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
+  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
+  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i8> %t3
+}
+
+; Negative test: not all 'or' insts can be converted to 'add'.
+
+define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
+; CHECK-LABEL: @or_add_not_enough_masking(
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[T1:%.*]] = or <4 x i8> [[V0]], <i8 undef, i8 undef, i8 -64, i8 -64>
+; CHECK-NEXT:    [[T2:%.*]] = add <4 x i8> [[V0]], <i8 1, i8 2, i8 undef, i8 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x i8> [[T3]]
+;
+  %v0 = lshr <4 x i8> %v, <i8 1, i8 1, i8 1, i8 1>          ; clear not enough top bits
+  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
+  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
+  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i8> %t3
+}
+
+; Try with 2 variable inputs.
+
+define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
+; CHECK-LABEL: @add_or_2_vars(
+; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
+  %t1 = add <4 x i32> %v1, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
+  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {
+; CHECK-LABEL: @or_add_2_vars(
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64>
+; CHECK-NEXT:    ret <4 x i8> [[T3]]
+;
+  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
+  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
+  %t2 = add nsw nuw <4 x i8> %v1, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
+  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i8> %t3
+}
+
+; The undef operand is used to simplify the shuffle mask, but don't assert that too soon.
+
+define <4 x i32> @PR41419(<4 x i32> %v) {
+; CHECK-LABEL: @PR41419(
+; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
+;
+  %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  ret <4 x i32> %s
+}
+
diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll
new file mode 100644
index 00000000000..0f0365a07fb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test(<16 x i8> %w, i32* %o1, float* %o2) {
+
+; CHECK:       %v.bc = bitcast <16 x i8> %w to <4 x i32>
+; CHECK-NEXT:  %v.extract = extractelement <4 x i32> %v.bc, i32 3
+; CHECK-NEXT:  %v.bc{{[0-9]*}} = bitcast <16 x i8> %w to <4 x float>
+; CHECK-NEXT:  %v.extract{{[0-9]*}} = extractelement <4 x float> %v.bc{{[0-9]*}}, i32 3
+
+  %v = shufflevector <16 x i8> %w, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  %f = bitcast <4 x i8> %v to float
+  %i = bitcast <4 x i8> %v to i32
+  store i32 %i, i32* %o1, align 4
+  store float %f, float* %o2, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/shufflevec-constant.ll b/llvm/test/Transforms/InstCombine/shufflevec-constant.ll
new file mode 100644
index 00000000000..37efba1f5c1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shufflevec-constant.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+
+define <4 x float> @__inff4() nounwind readnone {
+; CHECK-LABEL: @__inff4(
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000>
+;
+  %tmp14 = extractelement <1 x double> bitcast (<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000> to <1 x double>), i32 0
+  %tmp4 = bitcast double %tmp14 to i64
+  %tmp3 = bitcast i64 %tmp4 to <2 x float>
+  %tmp8 = shufflevector <2 x float> %tmp3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp9 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp8, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x float> %tmp9
+}
diff --git a/llvm/test/Transforms/InstCombine/sign-test-and-or.ll b/llvm/test/Transforms/InstCombine/sign-test-and-or.ll
new file mode 100644
index 00000000000..1920a800ef1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sign-test-and-or.ll
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @foo()
+
+define i1 @test1(i32 %a, i32 %b) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = icmp slt i32 %a, 0
+  %2 = icmp slt i32 %b, 0
+  %or.cond = or i1 %1, %2
+  ret i1 %or.cond
+}
+
+define i1 @test2(i32 %a, i32 %b) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = icmp sgt i32 %a, -1
+  %2 = icmp sgt i32 %b, -1
+  %or.cond = or i1 %1, %2
+  ret i1 %or.cond
+}
+
+define i1 @test3(i32 %a, i32 %b) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = icmp slt i32 %a, 0
+  %2 = icmp slt i32 %b, 0
+  %or.cond = and i1 %1, %2
+  ret i1 %or.cond
+}
+
+define i1 @test4(i32 %a, i32 %b) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = icmp sgt i32 %a, -1
+  %2 = icmp sgt i32 %b, -1
+  %or.cond = and i1 %1, %2
+  ret i1 %or.cond
+}
+
+define void @test5(i32 %a) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, -2013265920
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %if.then, label %if.end
+;
+  %and = and i32 %a, 134217728
+  %1 = icmp eq i32 %and, 0
+  %2 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test6(i32 %a) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, -2013265920
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %if.then, label %if.end
+;
+  %1 = icmp sgt i32 %a, -1
+  %and = and i32 %a, 134217728
+  %2 = icmp eq i32 %and, 0
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test7(i32 %a) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, -2013265920
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %if.end, label %if.then
+;
+  %and = and i32 %a, 134217728
+  %1 = icmp ne i32 %and, 0
+  %2 = icmp slt i32 %a, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test8(i32 %a) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, -2013265920
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %if.end, label %if.then
+;
+  %1 = icmp slt i32 %a, 0
+  %and = and i32 %a, 134217728
+  %2 = icmp ne i32 %and, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+
+if.then:
+  tail call void @foo()
+  ret void
+
+if.end:
+  ret void
+}
+
+define i1 @test9(i32 %a) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, -1073741824
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1073741824
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = and i32 %a, 1073741824
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %2, %3
+  ret i1 %or.cond
+}
+
+define i1 @test10(i32 %a) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 %a, 2
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = and i32 %a, 2
+  %2 = icmp eq i32 %1, 0
+  %3 = icmp ult i32 %a, 4
+  %or.cond = and i1 %2, %3
+  ret i1 %or.cond
+}
+
+define i1 @test11(i32 %a) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 %a, 1
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = and i32 %a, 2
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp ugt i32 %a, 3
+  %or.cond = or i1 %2, %3
+  ret i1 %or.cond
+}
diff --git a/llvm/test/Transforms/InstCombine/signed-comparison.ll b/llvm/test/Transforms/InstCombine/signed-comparison.ll
new file mode 100644
index 00000000000..1fbfc2d1463
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/signed-comparison.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Convert the zext+slt into a simple ult.
+
+define i1 @scalar_zext_slt(i16 %t4) {
+; CHECK-LABEL: @scalar_zext_slt(
+; CHECK-NEXT:    [[T6:%.*]] = icmp ult i16 %t4, 500
+; CHECK-NEXT:    ret i1 [[T6]]
+;
+  %t5 = zext i16 %t4 to i32
+  %t6 = icmp slt i32 %t5, 500
+  ret i1 %t6
+}
+
+define <4 x i1> @vector_zext_slt(<4 x i16> %t4) {
+; CHECK-LABEL: @vector_zext_slt(
+; CHECK-NEXT:    [[T6:%.*]] = icmp ult <4 x i16> %t4, <i16 500, i16 0, i16 501, i16 -1>
+; CHECK-NEXT:    ret <4 x i1> [[T6]]
+;
+  %t5 = zext <4 x i16> %t4 to <4 x i32>
+  %t6 = icmp slt <4 x i32> %t5, <i32 500, i32 0, i32 501, i32 65535>
+  ret <4 x i1> %t6
+}
+
diff --git a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll
new file mode 100644
index 00000000000..a69129cbcd2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll
@@ -0,0 +1,621 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; General pattern:
+;   X & Y
+;
+; Where Y is checking that all the high bits (covered by a mask 4294967168)
+; are uniform, i.e.  %arg & 4294967168  can be either  4294967168  or  0
+; Pattern can be one of:
+;   %t = add        i32 %arg,    128
+;   %r = icmp   ult i32 %t,      256
+; Or
+;   %t0 = shl       i32 %arg,    24
+;   %t1 = ashr      i32 %t0,     24
+;   %r  = icmp  eq  i32 %t1,     %arg
+; Or
+;   %t0 = trunc     i32 %arg  to i8
+;   %t1 = sext      i8  %t0   to i32
+;   %r  = icmp  eq  i32 %t1,     %arg
+; This pattern is a signed truncation check.
+;
+; And X is checking that some bit in that same mask is zero.
+; I.e. can be one of:
+;   %r = icmp sgt i32   %arg,    -1
+; Or
+;   %t = and      i32   %arg,    2147483648
+;   %r = icmp eq  i32   %t,      0
+;
+; Since we are checking that all the bits in that mask are the same,
+; and a particular bit is zero, what we are really checking is that all the
+; masked bits are zero.
+; So this should be transformed to:
+;   %r = icmp ult i32 %arg, 128
+
+; ============================================================================ ;
+; Basic positive test
+; ============================================================================ ;
+
+define i1 @positive_with_signbit(i32 %arg) {
+; CHECK-LABEL: @positive_with_signbit(
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
+;
+  %t1 = icmp sgt i32 %arg, -1
+  %t2 = add i32 %arg, 128
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t1, %t3
+  ret i1 %t4
+}
+
+define i1 @positive_with_mask(i32 %arg) {
+; CHECK-LABEL: @positive_with_mask(
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T5_SIMPLIFIED]]
+;
+  %t1 = and i32 %arg, 1107296256
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = add i32 %arg, 128
+  %t4 = icmp ult i32 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @positive_with_icmp(i32 %arg) {
+; CHECK-LABEL: @positive_with_icmp(
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
+;
+  %t1 = icmp ult i32 %arg, 512
+  %t2 = add i32 %arg, 128
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t1, %t3
+  ret i1 %t4
+}
+
+; Still the same
+define i1 @positive_with_aggressive_icmp(i32 %arg) {
+; CHECK-LABEL: @positive_with_aggressive_icmp(
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
+;
+  %t1 = icmp ult i32 %arg, 128
+  %t2 = add i32 %arg, 256
+  %t3 = icmp ult i32 %t2, 512
+  %t4 = and i1 %t1, %t3
+  ret i1 %t4
+}
+
+; I'm sure there is a bunch more patterns possible :/
+
+; This used to trigger an assert, because the icmp's are not direct
+; operands of the and.
+define i1 @positive_with_extra_and(i32 %arg, i1 %z) {
+; CHECK-LABEL: @positive_with_extra_and(
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[T5_SIMPLIFIED]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t1 = icmp sgt i32 %arg, -1
+  %t2 = add i32 %arg, 128
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t1, %z
+  %t5 = and i1 %t3, %t4
+  ret i1 %t5
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @positive_vec_splat(<2 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_splat(
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <2 x i32> [[ARG:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    ret <2 x i1> [[T4_SIMPLIFIED]]
+;
+  %t1 = icmp sgt <2 x i32> %arg, <i32 -1, i32 -1>
+  %t2 = add <2 x i32> %arg, <i32 128, i32 128>
+  %t3 = icmp ult <2 x i32> %t2, <i32 256, i32 256>
+  %t4 = and <2 x i1> %t1, %t3
+  ret <2 x i1> %t4
+}
+
+define <2 x i1> @positive_vec_nonsplat(<2 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_nonsplat(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <2 x i32> [[ARG:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[ARG]], <i32 128, i32 256>
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult <2 x i32> [[T2]], <i32 256, i32 512>
+; CHECK-NEXT:    [[T4:%.*]] = and <2 x i1> [[T1]], [[T3]]
+; CHECK-NEXT:    ret <2 x i1> [[T4]]
+;
+  %t1 = icmp sgt <2 x i32> %arg, <i32 -1, i32 -1>
+  %t2 = add <2 x i32> %arg, <i32 128, i32 256>
+  %t3 = icmp ult <2 x i32> %t2, <i32 256, i32 512>
+  %t4 = and <2 x i1> %t1, %t3
+  ret <2 x i1> %t4
+}
+
+define <3 x i1> @positive_vec_undef0(<3 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_undef0(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <3 x i32> [[ARG:%.*]], <i32 -1, i32 undef, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[ARG]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult <3 x i32> [[T2]], <i32 256, i32 256, i32 256>
+; CHECK-NEXT:    [[T4:%.*]] = and <3 x i1> [[T1]], [[T3]]
+; CHECK-NEXT:    ret <3 x i1> [[T4]]
+;
+  %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 undef, i32 -1>
+  %t2 = add <3 x i32> %arg, <i32 128, i32 128, i32 128>
+  %t3 = icmp ult <3 x i32> %t2, <i32 256, i32 256, i32 256>
+  %t4 = and <3 x i1> %t1, %t3
+  ret <3 x i1> %t4
+}
+
+define <3 x i1> @positive_vec_undef1(<3 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_undef1(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <3 x i32> [[ARG:%.*]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[ARG]], <i32 128, i32 undef, i32 128>
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult <3 x i32> [[T2]], <i32 256, i32 256, i32 256>
+; CHECK-NEXT:    [[T4:%.*]] = and <3 x i1> [[T1]], [[T3]]
+; CHECK-NEXT:    ret <3 x i1> [[T4]]
+;
+  %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 -1, i32 -1>
+  %t2 = add <3 x i32> %arg, <i32 128, i32 undef, i32 128>
+  %t3 = icmp ult <3 x i32> %t2, <i32 256, i32 256, i32 256>
+  %t4 = and <3 x i1> %t1, %t3
+  ret <3 x i1> %t4
+}
+
+define <3 x i1> @positive_vec_undef2(<3 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_undef2(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <3 x i32> [[ARG:%.*]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[ARG]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult <3 x i32> [[T2]], <i32 256, i32 undef, i32 256>
+; CHECK-NEXT:    [[T4:%.*]] = and <3 x i1> [[T1]], [[T3]]
+; CHECK-NEXT:    ret <3 x i1> [[T4]]
+;
+  %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 -1, i32 -1>
+  %t2 = add <3 x i32> %arg, <i32 128, i32 128, i32 128>
+  %t3 = icmp ult <3 x i32> %t2, <i32 256, i32 undef, i32 256>
+  %t4 = and <3 x i1> %t1, %t3
+  ret <3 x i1> %t4
+}
+
+define <3 x i1> @positive_vec_undef3(<3 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_undef3(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <3 x i32> [[ARG:%.*]], <i32 -1, i32 undef, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[ARG]], <i32 128, i32 undef, i32 128>
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult <3 x i32> [[T2]], <i32 256, i32 256, i32 256>
+; CHECK-NEXT:    [[T4:%.*]] = and <3 x i1> [[T1]], [[T3]]
+; CHECK-NEXT:    ret <3 x i1> [[T4]]
+;
+  %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 undef, i32 -1>
+  %t2 = add <3 x i32> %arg, <i32 128, i32 undef, i32 128>
+  %t3 = icmp ult <3 x i32> %t2, <i32 256, i32 256, i32 256>
+  %t4 = and <3 x i1> %t1, %t3
+  ret <3 x i1> %t4
+}
+
+define <3 x i1> @positive_vec_undef4(<3 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_undef4(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <3 x i32> [[ARG:%.*]], <i32 -1, i32 undef, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[ARG]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult <3 x i32> [[T2]], <i32 256, i32 undef, i32 256>
+; CHECK-NEXT:    [[T4:%.*]] = and <3 x i1> [[T1]], [[T3]]
+; CHECK-NEXT:    ret <3 x i1> [[T4]]
+;
+  %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 undef, i32 -1>
+  %t2 = add <3 x i32> %arg, <i32 128, i32 128, i32 128>
+  %t3 = icmp ult <3 x i32> %t2, <i32 256, i32 undef, i32 256>
+  %t4 = and <3 x i1> %t1, %t3
+  ret <3 x i1> %t4
+}
+
+define <3 x i1> @positive_vec_undef5(<3 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_undef5(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <3 x i32> [[ARG:%.*]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[ARG]], <i32 128, i32 undef, i32 128>
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult <3 x i32> [[T2]], <i32 256, i32 undef, i32 256>
+; CHECK-NEXT:    [[T4:%.*]] = and <3 x i1> [[T1]], [[T3]]
+; CHECK-NEXT:    ret <3 x i1> [[T4]]
+;
+  %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 -1, i32 -1>
+  %t2 = add <3 x i32> %arg, <i32 128, i32 undef, i32 128>
+  %t3 = icmp ult <3 x i32> %t2, <i32 256, i32 undef, i32 256>
+  %t4 = and <3 x i1> %t1, %t3
+  ret <3 x i1> %t4
+}
+
+define <3 x i1> @positive_vec_undef6(<3 x i32> %arg) {
+; CHECK-LABEL: @positive_vec_undef6(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <3 x i32> [[ARG:%.*]], <i32 -1, i32 undef, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[ARG]], <i32 128, i32 undef, i32 128>
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult <3 x i32> [[T2]], <i32 256, i32 undef, i32 256>
+; CHECK-NEXT:    [[T4:%.*]] = and <3 x i1> [[T1]], [[T3]]
+; CHECK-NEXT:    ret <3 x i1> [[T4]]
+;
+  %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 undef, i32 -1>
+  %t2 = add <3 x i32> %arg, <i32 128, i32 undef, i32 128>
+  %t3 = icmp ult <3 x i32> %t2, <i32 256, i32 undef, i32 256>
+  %t4 = and <3 x i1> %t1, %t3
+  ret <3 x i1> %t4
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i32 @gen32()
+
+define i1 @commutative() {
+; CHECK-LABEL: @commutative(
+; CHECK-NEXT:    [[ARG:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
+;
+  %arg = call i32 @gen32()
+  %t1 = icmp sgt i32 %arg, -1
+  %t2 = add i32 %arg, 128
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t3, %t1 ; swapped order
+  ret i1 %t4
+}
+
+define i1 @commutative_with_icmp() {
+; CHECK-LABEL: @commutative_with_icmp(
+; CHECK-NEXT:    [[ARG:%.*]] = call i32 @gen32()
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
+;
+  %arg = call i32 @gen32()
+  %t1 = icmp ult i32 %arg, 512
+  %t2 = add i32 %arg, 128
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t3, %t1 ; swapped order
+  ret i1 %t4
+}
+
+; ============================================================================ ;
+; Truncations.
+; ============================================================================ ;
+
+define i1 @positive_trunc_signbit(i32 %arg) {
+; CHECK-LABEL: @positive_trunc_signbit(
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T5_SIMPLIFIED]]
+;
+  %t1 = trunc i32 %arg to i8
+  %t2 = icmp sgt i8 %t1, -1
+  %t3 = add i32 %arg, 128
+  %t4 = icmp ult i32 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @positive_trunc_base(i32 %arg) {
+; CHECK-LABEL: @positive_trunc_base(
+; CHECK-NEXT:    [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i16
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i16 [[T1]], 128
+; CHECK-NEXT:    ret i1 [[T5_SIMPLIFIED]]
+;
+  %t1 = trunc i32 %arg to i16
+  %t2 = icmp sgt i16 %t1, -1
+  %t3 = add i16 %t1, 128
+  %t4 = icmp ult i16 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @positive_different_trunc_both(i32 %arg) {
+; CHECK-LABEL: @positive_different_trunc_both(
+; CHECK-NEXT:    [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i15
+; CHECK-NEXT:    [[T2:%.*]] = icmp sgt i15 [[T1]], -1
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[ARG]] to i16
+; CHECK-NEXT:    [[T4:%.*]] = add i16 [[T3]], 128
+; CHECK-NEXT:    [[T5:%.*]] = icmp ult i16 [[T4]], 256
+; CHECK-NEXT:    [[T6:%.*]] = and i1 [[T2]], [[T5]]
+; CHECK-NEXT:    ret i1 [[T6]]
+;
+  %t1 = trunc i32 %arg to i15
+  %t2 = icmp sgt i15 %t1, -1
+  %t3 = trunc i32 %arg to i16
+  %t4 = add i16 %t3, 128
+  %t5 = icmp ult i16 %t4, 256
+  %t6 = and i1 %t2, %t5
+  ret i1 %t6
+}
+
+; ============================================================================ ;
+; One-use tests.
+;
+; We will only produce one instruction, so we do not care about one-use.
+; But, we *could* handle more patterns that we weren't able to canonicalize
+; because of extra-uses.
+; ============================================================================ ;
+
+declare void @use32(i32)
+declare void @use8(i8)
+declare void @use1(i1)
+
+define i1 @oneuse_with_signbit(i32 %arg) {
+; CHECK-LABEL: @oneuse_with_signbit(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
+; CHECK-NEXT:    call void @use1(i1 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 128
+; CHECK-NEXT:    call void @use32(i32 [[T2]])
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
+; CHECK-NEXT:    call void @use1(i1 [[T3]])
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
+;
+  %t1 = icmp sgt i32 %arg, -1
+  call void @use1(i1 %t1)
+  %t2 = add i32 %arg, 128
+  call void @use32(i32 %t2)
+  %t3 = icmp ult i32 %t2, 256
+  call void @use1(i1 %t3)
+  %t4 = and i1 %t1, %t3
+  ret i1 %t4
+}
+
+define i1 @oneuse_with_mask(i32 %arg) {
+; CHECK-LABEL: @oneuse_with_mask(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[ARG:%.*]], 603979776
+; CHECK-NEXT:    call void @use32(i32 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
+; CHECK-NEXT:    call void @use32(i32 [[T3]])
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
+; CHECK-NEXT:    call void @use1(i1 [[T4]])
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128
+; CHECK-NEXT:    ret i1 [[T5_SIMPLIFIED]]
+;
+  %t1 = and i32 %arg, 603979776 ; some bit within the target 4294967168 mask.
+  call void @use32(i32 %t1)
+  %t2 = icmp eq i32 %t1, 0
+  call void @use1(i1 %t2)
+  %t3 = add i32 %arg, 128
+  call void @use32(i32 %t3)
+  %t4 = icmp ult i32 %t3, 256
+  call void @use1(i1 %t4)
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @oneuse_shl_ashr(i32 %arg) {
+; CHECK-LABEL: @oneuse_shl_ashr(
+; CHECK-NEXT:    [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = icmp sgt i8 [[T1]], -1
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[ARG]], 24
+; CHECK-NEXT:    call void @use32(i32 [[T3]])
+; CHECK-NEXT:    [[T4:%.*]] = ashr exact i32 [[T3]], 24
+; CHECK-NEXT:    call void @use32(i32 [[T4]])
+; CHECK-NEXT:    [[T5:%.*]] = icmp eq i32 [[T4]], [[ARG]]
+; CHECK-NEXT:    call void @use1(i1 [[T5]])
+; CHECK-NEXT:    [[T6:%.*]] = and i1 [[T2]], [[T5]]
+; CHECK-NEXT:    ret i1 [[T6]]
+;
+  %t1 = trunc i32 %arg to i8
+  call void @use8(i8 %t1)
+  %t2 = icmp sgt i8 %t1, -1
+  call void @use1(i1 %t2)
+  %t3 = shl i32 %arg, 24
+  call void @use32(i32 %t3)
+  %t4 = ashr i32 %t3, 24
+  call void @use32(i32 %t4)
+  %t5 = icmp eq i32 %t4, %arg
+  call void @use1(i1 %t5)
+  %t6 = and i1 %t2, %t5
+  ret i1 %t6
+}
+
+define zeroext i1 @oneuse_trunc_sext(i32 %arg) {
+; CHECK-LABEL: @oneuse_trunc_sext(
+; CHECK-NEXT:    [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = icmp sgt i8 [[T1]], -1
+; CHECK-NEXT:    call void @use1(i1 [[T2]])
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[ARG]] to i8
+; CHECK-NEXT:    call void @use8(i8 [[T3]])
+; CHECK-NEXT:    [[T4:%.*]] = sext i8 [[T3]] to i32
+; CHECK-NEXT:    call void @use32(i32 [[T4]])
+; CHECK-NEXT:    [[T5:%.*]] = icmp eq i32 [[T4]], [[ARG]]
+; CHECK-NEXT:    call void @use1(i1 [[T5]])
+; CHECK-NEXT:    [[T6:%.*]] = and i1 [[T2]], [[T5]]
+; CHECK-NEXT:    ret i1 [[T6]]
+;
+  %t1 = trunc i32 %arg to i8
+  call void @use8(i8 %t1)
+  %t2 = icmp sgt i8 %t1, -1
+  call void @use1(i1 %t2)
+  %t3 = trunc i32 %arg to i8
+  call void @use8(i8 %t3)
+  %t4 = sext i8 %t3 to i32
+  call void @use32(i32 %t4)
+  %t5 = icmp eq i32 %t4, %arg
+  call void @use1(i1 %t5)
+  %t6 = and i1 %t2, %t5
+  ret i1 %t6
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @negative_not_arg(i32 %arg, i32 %arg2) {
+; CHECK-LABEL: @negative_not_arg(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
+; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG2:%.*]], 128
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
+; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t1 = icmp sgt i32 %arg, -1
+  %t2 = add i32 %arg2, 128 ; not %arg
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t1, %t3
+  ret i1 %t4
+}
+
+define i1 @negative_trunc_not_arg(i32 %arg, i32 %arg2) {
+; CHECK-LABEL: @negative_trunc_not_arg(
+; CHECK-NEXT:    [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8
+; CHECK-NEXT:    [[T2:%.*]] = icmp sgt i8 [[T1]], -1
+; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG2:%.*]], 128
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
+; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
+; CHECK-NEXT:    ret i1 [[T5]]
+;
+  %t1 = trunc i32 %arg to i8
+  %t2 = icmp sgt i8 %t1, -1
+  %t3 = add i32 %arg2, 128 ; not %arg
+  %t4 = icmp ult i32 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @positive_with_mask_not_arg(i32 %arg, i32 %arg2) {
+; CHECK-LABEL: @positive_with_mask_not_arg(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[ARG:%.*]], 1140850688
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG2:%.*]], 128
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
+; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
+; CHECK-NEXT:    ret i1 [[T5]]
+;
+  %t1 = and i32 %arg, 1140850688
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = add i32 %arg2, 128 ; not %arg
+  %t4 = icmp ult i32 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @negative_with_nonuniform_bad_mask(i32 %arg) {
+; CHECK-LABEL: @negative_with_nonuniform_bad_mask(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[ARG:%.*]], 1711276033
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
+; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
+; CHECK-NEXT:    ret i1 [[T5]]
+;
+  %t1 = and i32 %arg, 1711276033 ; lowest bit is set
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = add i32 %arg, 128
+  %t4 = icmp ult i32 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @negative_with_uniform_bad_mask(i32 %arg) {
+; CHECK-LABEL: @negative_with_uniform_bad_mask(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[ARG:%.*]], -16777152
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
+; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
+; CHECK-NEXT:    ret i1 [[T5]]
+;
+  %t1 = and i32 %arg, 4278190144 ; 7'th bit is set
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = add i32 %arg, 128
+  %t4 = icmp ult i32 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @negative_with_wrong_mask(i32 %arg) {
+; CHECK-LABEL: @negative_with_wrong_mask(
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
+; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
+; CHECK-NEXT:    ret i1 [[T5]]
+;
+  %t1 = and i32 %arg, 1 ; not even checking the right mask
+  %t2 = icmp eq i32 %t1, 0
+  %t3 = add i32 %arg, 128
+  %t4 = icmp ult i32 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @negative_not_less_than(i32 %arg) {
+; CHECK-LABEL: @negative_not_less_than(
+; CHECK-NEXT:    ret i1 false
+;
+  %t1 = icmp sgt i32 %arg, -1
+  %t2 = add i32 %arg, 256 ; should be less than 256
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t1, %t3
+  ret i1 %t4
+}
+
+define i1 @negative_not_power_of_two(i32 %arg) {
+; CHECK-LABEL: @negative_not_power_of_two(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
+; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 255
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
+; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t1 = icmp sgt i32 %arg, -1
+  %t2 = add i32 %arg, 255 ; should be power of two
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t1, %t3
+  ret i1 %t4
+}
+
+define i1 @negative_not_next_power_of_two(i32 %arg) {
+; CHECK-LABEL: @negative_not_next_power_of_two(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
+; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 64
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
+; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t1 = icmp sgt i32 %arg, -1
+  %t2 = add i32 %arg, 64 ; should be 256 >> 1
+  %t3 = icmp ult i32 %t2, 256
+  %t4 = and i1 %t1, %t3
+  ret i1 %t4
+}
+
+; I don't think this can be folded, at least not into single instruction.
+define i1 @two_signed_truncation_checks(i32 %arg) {
+; CHECK-LABEL: @two_signed_truncation_checks(
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG:%.*]], 512
+; CHECK-NEXT:    [[T2:%.*]] = icmp ult i32 [[T1]], 1024
+; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
+; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
+; CHECK-NEXT:    ret i1 [[T5]]
+;
+  %t1 = add i32 %arg, 512
+  %t2 = icmp ult i32 %t1, 1024
+  %t3 = add i32 %arg, 128
+  %t4 = icmp ult i32 %t3, 256
+  %t5 = and i1 %t2, %t4
+  ret i1 %t5
+}
+
+define i1 @bad_trunc_stc(i32 %arg) {
+; CHECK-LABEL: @bad_trunc_stc(
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
+; CHECK-NEXT:    [[T2:%.*]] = trunc i32 [[ARG]] to i16
+; CHECK-NEXT:    [[T3:%.*]] = add i16 [[T2]], 128
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult i16 [[T3]], 256
+; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T1]], [[T4]]
+; CHECK-NEXT:    ret i1 [[T5]]
+;
+  %t1 = icmp sgt i32 %arg, -1 ; checks a bit outside of the i16
+  %t2 = trunc i32 %arg to i16
+  %t3 = add i16 %t2, 128
+  %t4 = icmp ult i16 %t3, 256
+  %t5 = and i1 %t1, %t4
+  ret i1 %t5
+}
diff --git a/llvm/test/Transforms/InstCombine/signext.ll b/llvm/test/Transforms/InstCombine/signext.ll
new file mode 100644
index 00000000000..df484ca24f8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/signext.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "n8:16:32:64"
+
+define i32 @test1(i32 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i32 %x, 16
+; CHECK-NEXT:    [[TMP_3:%.*]] = ashr exact i32 [[SEXT]], 16
+; CHECK-NEXT:    ret i32 [[TMP_3]]
+;
+  %tmp.1 = and i32 %x, 65535
+  %tmp.2 = xor i32 %tmp.1, -32768
+  %tmp.3 = add i32 %tmp.2, 32768
+  ret i32 %tmp.3
+}
+
+define i32 @test2(i32 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i32 %x, 16
+; CHECK-NEXT:    [[TMP_3:%.*]] = ashr exact i32 [[SEXT]], 16
+; CHECK-NEXT:    ret i32 [[TMP_3]]
+;
+  %tmp.1 = and i32 %x, 65535
+  %tmp.2 = xor i32 %tmp.1, 32768
+  %tmp.3 = add i32 %tmp.2, -32768
+  ret i32 %tmp.3
+}
+
+define i32 @test3(i16 %P) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP_5:%.*]] = sext i16 %P to i32
+; CHECK-NEXT:    ret i32 [[TMP_5]]
+;
+  %tmp.1 = zext i16 %P to i32
+  %tmp.4 = xor i32 %tmp.1, 32768
+  %tmp.5 = add i32 %tmp.4, -32768
+  ret i32 %tmp.5
+}
+
+define i32 @test4(i32 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i32 %x, 24
+; CHECK-NEXT:    [[TMP_3:%.*]] = ashr exact i32 [[SEXT]], 24
+; CHECK-NEXT:    ret i32 [[TMP_3]]
+;
+  %tmp.1 = and i32 %x, 255
+  %tmp.2 = xor i32 %tmp.1, 128
+  %tmp.3 = add i32 %tmp.2, -128
+  ret i32 %tmp.3
+}
+
+define i32 @test5(i32 %x) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP_2:%.*]] = shl i32 %x, 16
+; CHECK-NEXT:    [[TMP_4:%.*]] = ashr exact i32 [[TMP_2]], 16
+; CHECK-NEXT:    ret i32 [[TMP_4]]
+;
+  %tmp.2 = shl i32 %x, 16
+  %tmp.4 = ashr i32 %tmp.2, 16
+  ret i32 %tmp.4
+}
+
+;  If the shift amount equals the difference in width of the destination
+;  and source scalar types:
+;  ashr (shl (zext X), C), C --> sext X
+
+define i32 @test6(i16 %P) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP_5:%.*]] = sext i16 %P to i32
+; CHECK-NEXT:    ret i32 [[TMP_5]]
+;
+  %tmp.1 = zext i16 %P to i32
+  %sext1 = shl i32 %tmp.1, 16
+  %tmp.5 = ashr i32 %sext1, 16
+  ret i32 %tmp.5
+}
+
+; Vectors should get the same fold as above.
+
+define <2 x i32> @test6_splat_vec(<2 x i12> %P) {
+; CHECK-LABEL: @test6_splat_vec(
+; CHECK-NEXT:    [[ASHR:%.*]] = sext <2 x i12> %P to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[ASHR]]
+;
+  %z = zext <2 x i12> %P to <2 x i32>
+  %shl = shl <2 x i32> %z, <i32 20, i32 20>
+  %ashr = ashr <2 x i32> %shl, <i32 20, i32 20>
+  ret <2 x i32> %ashr
+}
+
+define i32 @test7(i32 %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[SUB:%.*]] = ashr i32 %x, 5
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %shr = lshr i32 %x, 5
+  %xor = xor i32 %shr, 67108864
+  %sub = add i32 %xor, -67108864
+  ret i32 %sub
+}
+
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
new file mode 100644
index 00000000000..db8f17917d1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -instcombine -disable-output
+
+; SimplifyDemandedBits should cope with pointer types.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
+	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
+	%struct.block_symbol = type { [3 x %struct.rtunion], %struct.object_block*, i64 }
+	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
+	%struct.omp_clause_subcode = type { i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.section = type { %struct.unnamed_section }
+	%struct.u = type { %struct.block_symbol }
+	%struct.unnamed_section = type { %struct.omp_clause_subcode, void (i8*)*, i8*, %struct.section* }
+
+define fastcc void @cse_insn(%struct.rtx_def* %insn, %struct.rtx_def* %libcall_insn) nounwind {
+entry:
+	br i1 undef, label %bb43, label %bb88
+
+bb43:		; preds = %entry
+	br label %bb88
+
+bb88:		; preds = %bb43, %entry
+	br i1 undef, label %bb95, label %bb107
+
+bb95:		; preds = %bb88
+	unreachable
+
+bb107:		; preds = %bb88
+	%0 = load i16, i16* undef, align 8		; <i16> [#uses=1]
+	%1 = icmp eq i16 %0, 38		; <i1> [#uses=1]
+	%src_eqv_here.0 = select i1 %1, %struct.rtx_def* null, %struct.rtx_def* null		; <%struct.rtx_def*> [#uses=1]
+	br i1 undef, label %bb127, label %bb125
+
+bb125:		; preds = %bb107
+	br i1 undef, label %bb127, label %bb126
+
+bb126:		; preds = %bb125
+	br i1 undef, label %bb129, label %bb133
+
+bb127:		; preds = %bb125, %bb107
+	unreachable
+
+bb129:		; preds = %bb126
+	br label %bb133
+
+bb133:		; preds = %bb129, %bb126
+	br i1 undef, label %bb134, label %bb146
+
+bb134:		; preds = %bb133
+	unreachable
+
+bb146:		; preds = %bb133
+	br i1 undef, label %bb180, label %bb186
+
+bb180:		; preds = %bb146
+	%2 = icmp eq %struct.rtx_def* null, null		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i8		; <i8> [#uses=1]
+	%4 = icmp ne %struct.rtx_def* %src_eqv_here.0, null		; <i1> [#uses=1]
+	%5 = zext i1 %4 to i8		; <i8> [#uses=1]
+	%toBool181 = icmp ne i8 %3, 0		; <i1> [#uses=1]
+	%toBool182 = icmp ne i8 %5, 0		; <i1> [#uses=1]
+	%6 = and i1 %toBool181, %toBool182		; <i1> [#uses=1]
+	%7 = zext i1 %6 to i8		; <i8> [#uses=1]
+	%toBool183 = icmp ne i8 %7, 0		; <i1> [#uses=1]
+	br i1 %toBool183, label %bb184, label %bb186
+
+bb184:		; preds = %bb180
+	br i1 undef, label %bb185, label %bb186
+
+bb185:		; preds = %bb184
+	br label %bb186
+
+bb186:		; preds = %bb185, %bb184, %bb180, %bb146
+	br i1 undef, label %bb190, label %bb195
+
+bb190:		; preds = %bb186
+	unreachable
+
+bb195:		; preds = %bb186
+	unreachable
+}
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls-erased.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls-erased.ll
new file mode 100644
index 00000000000..19cfcf8eba9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls-erased.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+target triple = "x86_64"
+
+define double @pow_exp(double %x, double %y) {
+; CHECK-LABEL: @pow_exp(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[EXP:%.*]] = call fast double @llvm.exp.f64(double [[MUL]])
+; CHECK-NEXT:    ret double [[EXP]]
+;
+  %A = alloca i1
+  %call = call fast double @exp(double %x) #1
+  %pow = call fast double @llvm.pow.f64(double %call, double %y)
+  %C1 = fcmp ule double %call, %pow
+  store i1 %C1, i1* %A
+  ret double %pow
+}
+
+declare double @exp(double)
+
+declare double @llvm.pow.f64(double, double) #0
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
new file mode 100644
index 00000000000..7f198c36907
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -0,0 +1,180 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+
+@G = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+define void @foo(i8* %P, i32* %X) {
+	call i32 (i8*, i8*, ...) @sprintf( i8* %P, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i32* %X )		; <i32>:1 [#uses=0]
+	ret void
+}
+
+; PR1307
+@str = internal constant [5 x i8] c"foog\00"
+@str1 = internal constant [8 x i8] c"blahhh!\00"
+@str2 = internal constant [5 x i8] c"Ponk\00"
+
+define i8* @test1() {
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([5 x i8], [5 x i8]* @str, i32 0, i32 2), i32 103 )              ; <i8*> [#uses=1]
+        ret i8* %tmp3
+
+; CHECK-LABEL: @test1(
+; CHECK: ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @str, i32 0, i32 3)
+}
+
+declare i8* @strchr(i8*, i32)
+
+define i8* @test2() {
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([8 x i8], [8 x i8]* @str1, i32 0, i32 2), i32 0 )               ; <i8*> [#uses=1]
+        ret i8* %tmp3
+
+; CHECK-LABEL: @test2(
+; CHECK: ret i8* getelementptr inbounds ([8 x i8], [8 x i8]* @str1, i32 0, i32 7)
+}
+
+define i8* @test3() {
+entry:
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([5 x i8], [5 x i8]* @str2, i32 0, i32 1), i32 80 )              ; <i8*> [#uses=1]
+        ret i8* %tmp3
+
+; CHECK-LABEL: @test3(
+; CHECK: ret i8* null
+}
+
+@_2E_str = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+
+declare i32 @memcmp(i8*, i8*, i32) nounwind readonly
+
+define i1 @PR2341(i8** %start_addr) {
+entry:
+	%tmp4 = load i8*, i8** %start_addr, align 4		; <i8*> [#uses=1]
+	%tmp5 = call i32 @memcmp( i8* %tmp4, i8* getelementptr ([5 x i8], [5 x i8]* @_2E_str, i32 0, i32 0), i32 4 ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp6 = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]
+	ret i1 %tmp6
+
+; CHECK-LABEL: @PR2341(
+; CHECK: i32
+}
+
+define i32 @PR4284() nounwind {
+entry:
+	%c0 = alloca i8, align 1		; <i8*> [#uses=2]
+	%c2 = alloca i8, align 1		; <i8*> [#uses=2]
+	store i8 64, i8* %c0
+	store i8 -127, i8* %c2
+	%call = call i32 @memcmp(i8* %c0, i8* %c2, i32 1)		; <i32> [#uses=1]
+	ret i32 %call
+
+; CHECK-LABEL: @PR4284(
+; CHECK: ret i32 -65
+}
+
+%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64, %struct.pthread_mutex*, %struct.pthread*, i32, i32, %union.anon }
+%struct.__sbuf = type { i8*, i32, [4 x i8] }
+%struct.pthread = type opaque
+%struct.pthread_mutex = type opaque
+%union.anon = type { i64, [120 x i8] }
+@.str13 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+@.str14 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+
+define i32 @PR4641(i32 %argc, i8** %argv) nounwind {
+entry:
+	call void @exit(i32 0) nounwind
+	%cond392 = select i1 undef, i8* getelementptr ([2 x i8], [2 x i8]* @.str13, i32 0, i32 0), i8* getelementptr ([2 x i8], [2 x i8]* @.str14, i32 0, i32 0)		; <i8*> [#uses=1]
+	%call393 = call %struct.__sFILE* @fopen(i8* undef, i8* %cond392) nounwind		; <%struct.__sFILE*> [#uses=0]
+	unreachable
+}
+
+declare %struct.__sFILE* @fopen(i8*, i8*)
+
+declare void @exit(i32)
+
+define i32 @PR4645() {
+entry:
+	br label %if.then
+
+lor.lhs.false:		; preds = %while.body
+	br i1 undef, label %if.then, label %for.cond
+
+if.then:		; preds = %lor.lhs.false, %while.body
+	call void @exit(i32 1)
+	br label %for.cond
+
+for.cond:		; preds = %for.end, %if.then, %lor.lhs.false
+	%j.0 = phi i32 [ %inc47, %for.end ], [ 0, %if.then ], [ 0, %lor.lhs.false ]		; <i32> [#uses=1]
+	unreachable
+
+for.end:		; preds = %for.cond20
+	%inc47 = add i32 %j.0, 1		; <i32> [#uses=1]
+	br label %for.cond
+}
+
+@h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
+@hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
+@hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
+
+define i32 @MemCpy() {
+  %h_p = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0
+  %hel_p = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0
+  %hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0
+  %target = alloca [1024 x i8]
+  %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %target_p, i8* align 2 %h_p, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %target_p, i8* align 4 %hel_p, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %target_p, i8* align 8 %hello_u_p, i32 8, i1 false)
+  ret i32 0
+
+; CHECK-LABEL: @MemCpy(
+; CHECK-NOT: llvm.memcpy
+; CHECK: ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+
+declare i32 @strcmp(i8*, i8*) #0
+
+define void @test9(i8* %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NOT: strcmp
+  %y = call i32 @strcmp(i8* %x, i8* %x) #1
+  ret void
+}
+
+; PR30484 - https://llvm.org/bugs/show_bug.cgi?id=30484
+; These aren't the library functions you're looking for...
+
+declare i32 @isdigit(i8)
+declare i32 @isascii(i8)
+declare i32 @toascii(i8)
+
+define i32 @fake_isdigit(i8 %x) {
+; CHECK-LABEL: @fake_isdigit(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @isdigit(i8 %x)
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %y = call i32 @isdigit(i8 %x)
+  ret i32 %y
+}
+
+define i32 @fake_isascii(i8 %x) {
+; CHECK-LABEL: @fake_isascii(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @isascii(i8 %x)
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %y = call i32 @isascii(i8 %x)
+  ret i32 %y
+}
+
+define i32 @fake_toascii(i8 %x) {
+; CHECK-LABEL: @fake_toascii(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @toascii(i8 %x)
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %y = call i32 @toascii(i8 %x)
+  ret i32 %y
+}
+
+
+attributes #0 = { nobuiltin }
+attributes #1 = { builtin }
diff --git a/llvm/test/Transforms/InstCombine/sincospi.ll b/llvm/test/Transforms/InstCombine/sincospi.ll
new file mode 100644
index 00000000000..10342c50096
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sincospi.ll
@@ -0,0 +1,101 @@
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.9 | FileCheck %s --check-prefix=CHECK-FLOAT-IN-VEC
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios7.0 | FileCheck %s
+; RUN: opt -instcombine -S < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+; RUN: opt -instcombine -S < %s -mtriple=x86_64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-NO-SINCOS
+
+
+attributes #0 = { readnone nounwind }
+
+declare float @__sinpif(float %x) #0
+declare float @__cospif(float %x) #0 
+
+declare double @__sinpi(double %x) #0
+declare double @__cospi(double %x) #0 
+
+@var32 = global float 0.0
+@var64 = global double 0.0
+
+define float @test_instbased_f32() {
+       %val = load float, float* @var32
+       %sin = call float @__sinpif(float %val) #0
+       %cos = call float @__cospif(float %val) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float, float* @var32
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospif_stret(float [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load float, float* @var32
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospif_stret(float [[VAL]])
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define float @test_constant_f32() {
+       %sin = call float @__sinpif(float 1.0) #0
+       %cos = call float @__cospif(float 1.0) #0
+       %res = fadd float %sin, %cos
+       ret float %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospif_stret(float 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
+; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospif_stret(float 1.000000e+00)
+; CHECK: extractvalue { float, float } [[SINCOS]], 0
+; CHECK: extractvalue { float, float } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call float @__sinpif
+; CHECK-NO-SINCOS: call float @__cospif
+}
+
+define double @test_instbased_f64() {
+       %val = load double, double* @var64
+       %sin = call double @__sinpi(double %val) #0
+       %cos = call double @__cospi(double %val) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double, double* @var64
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[VAL:%[a-z0-9]+]] = load double, double* @var64
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
+
+define double @test_constant_f64() {
+       %sin = call double @__sinpi(double 1.0) #0
+       %cos = call double @__cospi(double 1.0) #0
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
+; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
+; CHECK: extractvalue { double, double } [[SINCOS]], 0
+; CHECK: extractvalue { double, double } [[SINCOS]], 1
+
+; CHECK-NO-SINCOS: call double @__sinpi
+; CHECK-NO-SINCOS: call double @__cospi
+}
+
+define double @test_fptr(double (double)* %fptr, double %p1) {
+       %sin = call double @__sinpi(double %p1) #0
+       %cos = call double %fptr(double %p1)
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-LABEL: @test_fptr
+; CHECK: __sinpi
+}
diff --git a/llvm/test/Transforms/InstCombine/sink-alloca.ll b/llvm/test/Transforms/InstCombine/sink-alloca.ll
new file mode 100644
index 00000000000..f2de74ff533
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sink-alloca.ll
@@ -0,0 +1,52 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i686-unknown-linux-gnu"
+
+; Check that instcombine doesn't sink dynamic allocas across llvm.stacksave.
+
+; Helper to generate branch conditions.
+declare i1 @cond()
+
+declare i32* @use_and_return(i32*)
+
+declare i8* @llvm.stacksave() #0
+
+declare void @llvm.stackrestore(i8*) #0
+
+define void @foo(i32 %x) {
+entry:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %ret, label %nonentry
+
+nonentry:                                         ; preds = %entry
+  %argmem = alloca i32, i32 %x, align 4
+  %sp = call i8* @llvm.stacksave()
+  %c2 = call i1 @cond()
+  br i1 %c2, label %ret, label %sinktarget
+
+sinktarget:                                       ; preds = %nonentry
+  ; Arrange for there to be a single use of %argmem by returning it.
+  %p = call i32* @use_and_return(i32* nonnull %argmem)
+  store i32 13, i32* %p, align 4
+  call void @llvm.stackrestore(i8* %sp)
+  %0 = call i32* @use_and_return(i32* %p)
+  br label %ret
+
+ret:                                              ; preds = %sinktarget, %nonentry, %entry
+  ret void
+}
+
+; CHECK-LABEL: define void @foo(i32 %x)
+; CHECK: nonentry:
+; CHECK:   %argmem = alloca i32, i32 %x
+; CHECK:   %sp = call i8* @llvm.stacksave()
+; CHECK:   %c2 = call i1 @cond()
+; CHECK:   br i1 %c2, label %ret, label %sinktarget
+; CHECK: sinktarget:
+; CHECK:   %p = call i32* @use_and_return(i32* nonnull %argmem)
+; CHECK:   store i32 13, i32* %p
+; CHECK:   call void @llvm.stackrestore(i8* %sp)
+; CHECK:   %0 = call i32* @use_and_return(i32* %p)
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/InstCombine/sink-into-catchswitch.ll b/llvm/test/Transforms/InstCombine/sink-into-catchswitch.ll
new file mode 100644
index 00000000000..893bf2b16f7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sink-into-catchswitch.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+%struct.B = type { i64, i64 }
+
+define void @test1(%struct.B* %p) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  invoke.cont:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.B* [[P:%.*]] to <2 x i64>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT:    invoke void @throw()
+; CHECK-NEXT:    to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
+; CHECK:       catch.dispatch:
+; CHECK-NEXT:    [[CS:%.*]] = catchswitch within none [label %invoke.cont1] unwind label [[EHCLEANUP:%.*]]
+; CHECK:       invoke.cont1:
+; CHECK-NEXT:    [[CATCH:%.*]] = catchpad within [[CS]] [i8* null, i32 64, i8* null]
+; CHECK-NEXT:    invoke void @throw() [ "funclet"(token [[CATCH]]) ]
+; CHECK-NEXT:    to label [[UNREACHABLE]] unwind label [[EHCLEANUP]]
+; CHECK:       ehcleanup:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[TMP2]], [[CATCH_DISPATCH]] ], [ 9, [[INVOKE_CONT1:%.*]] ]
+; CHECK-NEXT:    [[CLEANUP:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @release(i64 [[PHI]]) [ "funclet"(token [[CLEANUP]]) ]
+; CHECK-NEXT:    cleanupret from [[CLEANUP]] unwind to caller
+; CHECK:       unreachable:
+; CHECK-NEXT:    unreachable
+;
+invoke.cont:
+  %0 = bitcast %struct.B* %p to <2 x i64>*
+  %1 = load <2 x i64>, <2 x i64>* %0, align 8
+  %2 = extractelement <2 x i64> %1, i32 0
+  invoke void @throw()
+  to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %invoke.cont
+  %cs = catchswitch within none [label %invoke.cont1] unwind label %ehcleanup
+
+invoke.cont1:                                     ; preds = %catch.dispatch
+  %catch = catchpad within %cs [i8* null, i32 64, i8* null]
+  invoke void @throw() [ "funclet"(token %catch) ]
+  to label %unreachable unwind label %ehcleanup
+
+ehcleanup:                                        ; preds = %invoke.cont1, %catch.dispatch
+  %phi = phi i64 [ %2, %catch.dispatch ], [ 9, %invoke.cont1 ]
+  %cleanup = cleanuppad within none []
+  call void @release(i64 %phi) [ "funclet"(token %cleanup) ]
+  cleanupret from %cleanup unwind to caller
+
+unreachable:                                      ; preds = %invoke.cont1, %invoke.cont
+  unreachable
+}
+
+declare i32 @__CxxFrameHandler3(...)
+declare void @throw()
+declare void @release(i64)
diff --git a/llvm/test/Transforms/InstCombine/sink_instruction.ll b/llvm/test/Transforms/InstCombine/sink_instruction.ll
new file mode 100644
index 00000000000..4c057c66f7f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sink_instruction.ll
@@ -0,0 +1,79 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+;; This tests that the instructions in the entry blocks are sunk into each
+;; arm of the 'if'.
+
+define i32 @test1(i1 %C, i32 %A, i32 %B) {
+; CHECK-LABEL: @test1(
+entry:
+        %tmp.2 = sdiv i32 %A, %B                ; <i32> [#uses=1]
+        %tmp.9 = add i32 %B, %A         ; <i32> [#uses=1]
+        br i1 %C, label %then, label %endif
+
+then:           ; preds = %entry
+        ret i32 %tmp.9
+
+endif:          ; preds = %entry
+; CHECK: sdiv i32
+; CHECK-NEXT: ret i32
+        ret i32 %tmp.2
+}
+
+
+;; PHI use, sink divide before call.
+define i32 @test2(i32 %x) nounwind ssp {
+; CHECK-LABEL: @test2(
+; CHECK-NOT: sdiv i32
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb2, %entry
+  %x_addr.17 = phi i32 [ %x, %entry ], [ %x_addr.0, %bb2 ] ; <i32> [#uses=4]
+  %i.06 = phi i32 [ 0, %entry ], [ %4, %bb2 ]     ; <i32> [#uses=1]
+  %0 = add nsw i32 %x_addr.17, 1                  ; <i32> [#uses=1]
+  %1 = sdiv i32 %0, %x_addr.17                    ; <i32> [#uses=1]
+  %2 = icmp eq i32 %x_addr.17, 0                  ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+; CHECK: bb1:
+; CHECK-NEXT: add nsw i32 %x_addr.17, 1
+; CHECK-NEXT: sdiv i32
+; CHECK-NEXT: tail call i32 @bar()
+  %3 = tail call i32 @bar() nounwind       ; <i32> [#uses=0]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %bb1
+  %x_addr.0 = phi i32 [ %1, %bb1 ], [ %x_addr.17, %bb ] ; <i32> [#uses=2]
+  %4 = add nsw i32 %i.06, 1                       ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, 1000000             ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb4, label %bb
+
+bb4:                                              ; preds = %bb2
+  ret i32 %x_addr.0
+}
+
+declare i32 @bar()
+
+define i32 @test3(i32* nocapture readonly %P, i32 %i) {
+entry:
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  switch i32 %i, label %sw.epilog [
+    i32 5, label %sw.bb
+    i32 2, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+; CHECK-LABEL: sw.bb:
+; CHECK: %idxprom = sext i32 %i to i64
+; CHECK: %arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
+; CHECK: %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %i
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb
+  %sum.0 = phi i32 [ %add, %sw.bb ], [ 0, %entry ]
+  ret i32 %sum.0
+}
diff --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll
new file mode 100644
index 00000000000..149154723b9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sitofp.ll
@@ -0,0 +1,218 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test1(i8 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = sitofp i8 %A to double
+  %C = fcmp ult double %B, 128.0
+  ret i1 %C
+}
+
+define i1 @test2(i8 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = sitofp i8 %A to double
+  %C = fcmp ugt double %B, -128.1
+  ret i1 %C
+}
+
+define i1 @test3(i8 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i1 true
+;
+  %B = sitofp i8 %A to double
+  %C = fcmp ule double %B, 127.0
+  ret i1 %C
+}
+
+define i1 @test4(i8 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A:%.*]], 127
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = sitofp i8 %A to double
+  %C = fcmp ult double %B, 127.0
+  ret i1 %C
+}
+
+define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = sitofp i32 %A to double
+  %C = fptosi double %B to i32
+  %D = uitofp i32 %C to double
+  %E = fptoui double %D to i32
+  ret i32 %E
+}
+
+define i32 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[ADDCONV:%.*]] = and i32 [[A:%.*]], 39
+; CHECK-NEXT:    ret i32 [[ADDCONV]]
+;
+  %B = and i32 %A, 7
+  %C = and i32 %A, 32
+  %D = sitofp i32 %B to double
+  %E = sitofp i32 %C to double
+  %F = fadd double %D, %E
+  %G = fptosi double %F to i32
+  ret i32 %G
+}
+
+define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = sitofp i32 %A to double
+  %C = fptoui double %B to i32
+  ret i32 %C
+}
+
+define i32 @test8(i32 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = uitofp i32 %A to double
+  %C = fptosi double %B to i32
+  ret i32 %C
+}
+
+define i32 @test9(i8 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sitofp i8 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+define i32 @test10(i8 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[C:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sitofp i8 %A to float
+  %C = fptosi float %B to i32
+  ret i32 %C
+}
+
+; If the input value is outside of the range of the output cast, it's
+; undefined behavior, so we can assume it fits.
+
+define i8 @test11(i32 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[C:%.*]] = trunc i32 [[A:%.*]] to i8
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = sitofp i32 %A to float
+  %C = fptosi float %B to i8
+  ret i8 %C
+}
+
+; If the input value is negative, it'll be outside the range of the
+; output cast, and thus undefined behavior.
+
+define i32 @test12(i8 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[C:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sitofp i8 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; This can't fold because the 25-bit input doesn't fit in the mantissa.
+
+define i32 @test13(i25 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[B:%.*]] = uitofp i25 [[A:%.*]] to float
+; CHECK-NEXT:    [[C:%.*]] = fptoui float [[B]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = uitofp i25 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; But this one can.
+
+define i32 @test14(i24 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[C:%.*]] = zext i24 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = uitofp i24 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; And this one can too.
+
+define i24 @test15(i32 %A) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[C:%.*]] = trunc i32 [[A:%.*]] to i24
+; CHECK-NEXT:    ret i24 [[C]]
+;
+  %B = uitofp i32 %A to float
+  %C = fptoui float %B to i24
+  ret i24 %C
+}
+
+; This can fold because the 25-bit input is signed and we discard the sign bit.
+
+define i32 @test16(i25 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[C:%.*]] = zext i25 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sitofp i25 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; This can't fold because the 26-bit input won't fit the mantissa
+; even after discarding the signed bit.
+
+define i32 @test17(i26 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[B:%.*]] = sitofp i26 [[A:%.*]] to float
+; CHECK-NEXT:    [[C:%.*]] = fptoui float [[B]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sitofp i26 %A to float
+  %C = fptoui float %B to i32
+  ret i32 %C
+}
+
+; This can fold because the 54-bit output is signed and we discard the sign bit.
+
+define i54 @test18(i64 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[C:%.*]] = trunc i64 [[A:%.*]] to i54
+; CHECK-NEXT:    ret i54 [[C]]
+;
+  %B = sitofp i64 %A to double
+  %C = fptosi double %B to i54
+  ret i54 %C
+}
+
+; This can't fold because the 55-bit output won't fit the mantissa
+; even after discarding the sign bit.
+
+define i55 @test19(i64 %A) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[B:%.*]] = sitofp i64 [[A:%.*]] to double
+; CHECK-NEXT:    [[C:%.*]] = fptosi double [[B]] to i55
+; CHECK-NEXT:    ret i55 [[C]]
+;
+  %B = sitofp i64 %A to double
+  %C = fptosi double %B to i55
+  ret i55 %C
+}
+
diff --git a/llvm/test/Transforms/InstCombine/smax-icmp.ll b/llvm/test/Transforms/InstCombine/smax-icmp.ll
new file mode 100644
index 00000000000..e64626fc665
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/smax-icmp.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; If we have an smax feeding a signed or equality icmp that shares an
+; operand with the smax, the compare should always be folded.
+; Test all 4 foldable predicates (eq,ne,sgt,sle) * 4 commutation
+; possibilities for each predicate. Note that folds to true/false
+; (predicate = sge/slt) or folds to an existing instruction should be
+; handled by InstSimplify.
+
+; smax(X, Y) == X --> X >= Y
+
+define i1 @eq_smax1(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_smax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp sgt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp eq i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @eq_smax2(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_smax2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp sgt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp eq i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the max op to the RHS.
+
+define i1 @eq_smax3(i32 %a, i32 %y) {
+; CHECK-LABEL: @eq_smax3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp sgt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp eq i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @eq_smax4(i32 %a, i32 %y) {
+; CHECK-LABEL: @eq_smax4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp sgt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp eq i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; smax(X, Y) <= X --> X >= Y
+
+define i1 @sle_smax1(i32 %x, i32 %y) {
+; CHECK-LABEL: @sle_smax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp sgt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sle i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @sle_smax2(i32 %x, i32 %y) {
+; CHECK-LABEL: @sle_smax2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp sgt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sle i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the max op to the RHS.
+
+define i1 @sle_smax3(i32 %a, i32 %y) {
+; CHECK-LABEL: @sle_smax3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp sgt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sge i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @sle_smax4(i32 %a, i32 %y) {
+; CHECK-LABEL: @sle_smax4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp sgt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sge i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; smax(X, Y) != X --> X < Y
+
+define i1 @ne_smax1(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_smax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp sgt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ne i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @ne_smax2(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_smax2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 %y, %x
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp1 = icmp sgt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ne i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the max op to the RHS.
+
+define i1 @ne_smax3(i32 %a, i32 %y) {
+; CHECK-LABEL: @ne_smax3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp sgt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ne i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @ne_smax4(i32 %a, i32 %y) {
+; CHECK-LABEL: @ne_smax4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp sgt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ne i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; smax(X, Y) > X --> X < Y
+
+define i1 @sgt_smax1(i32 %x, i32 %y) {
+; CHECK-LABEL: @sgt_smax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp sgt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sgt i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @sgt_smax2(i32 %x, i32 %y) {
+; CHECK-LABEL: @sgt_smax2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 %y, %x
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp1 = icmp sgt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sgt i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the max op to the RHS.
+
+define i1 @sgt_smax3(i32 %a, i32 %y) {
+; CHECK-LABEL: @sgt_smax3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp sgt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp slt i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @sgt_smax4(i32 %a, i32 %y) {
+; CHECK-LABEL: @sgt_smax4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp sgt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp slt i32 %x, %sel
+  ret i1 %cmp2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/smin-icmp.ll b/llvm/test/Transforms/InstCombine/smin-icmp.ll
new file mode 100644
index 00000000000..b3e375fdcdd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/smin-icmp.ll
@@ -0,0 +1,333 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; If we have an smin feeding a signed or equality icmp that shares an
+; operand with the smin, the compare should always be folded.
+; Test all 6 foldable predicates (eq,ne,sge,sgt,sle,slt) * 4 commutation
+; possibilities for each predicate. Note that folds to true/false or
+; folds to an existing instruction may be handled by InstSimplify.
+
+; smin(X, Y) == X --> X <= Y
+
+define i1 @eq_smin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_smin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp eq i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @eq_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_smin2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp eq i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @eq_smin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @eq_smin3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp eq i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @eq_smin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @eq_smin4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp eq i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; smin(X, Y) >= X --> X <= Y
+
+define i1 @sge_smin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @sge_smin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sge i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @sge_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @sge_smin2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sge i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @sge_smin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @sge_smin3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sle i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @sge_smin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @sge_smin4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sle i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; smin(X, Y) != X --> X > Y
+
+define i1 @ne_smin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_smin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ne i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @ne_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_smin2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 %y, %x
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ne i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @ne_smin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @ne_smin3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ne i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @ne_smin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @ne_smin4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ne i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; smin(X, Y) < X --> X > Y
+
+define i1 @slt_smin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @slt_smin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp slt i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @slt_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @slt_smin2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 %y, %x
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp slt i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @slt_smin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @slt_smin3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sgt i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @slt_smin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @slt_smin4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sgt i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; smin(X, Y) <= X --> true
+
+define i1 @sle_smin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @sle_smin1(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sle i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @sle_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @sle_smin2(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sle i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @sle_smin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @sle_smin3(
+; CHECK-NEXT:    ret i1 true
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sge i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @sle_smin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @sle_smin4(
+; CHECK-NEXT:    ret i1 true
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sge i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; smin(X, Y) > X --> false
+
+define i1 @sgt_smin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @sgt_smin1(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp sgt i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @sgt_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @sgt_smin2(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp sgt i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @sgt_smin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @sgt_smin3(
+; CHECK-NEXT:    ret i1 false
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp slt i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @sgt_smin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @sgt_smin4(
+; CHECK-NEXT:    ret i1 false
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp slt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp slt i32 %x, %sel
+  ret i1 %cmp2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/snprintf.ll b/llvm/test/Transforms/InstCombine/snprintf.ll
new file mode 100644
index 00000000000..f323bf9bf57
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/snprintf.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@.str = private unnamed_addr constant [4 x i8] c"str\00", align 1
+@.str.1 = private unnamed_addr constant [3 x i8] c"%%\00", align 1
+@.str.2 = private unnamed_addr constant [3 x i8] c"%c\00", align 1
+@.str.3 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+declare i32 @snprintf(i8*, i64, i8*, ...) #1
+
+define void @test_not_const_fmt(i8* %buf, i8* %fmt) #0 {
+; CHECK-LABEL: @test_not_const_fmt(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* [[BUF:%.*]], i64 32, i8* [[FMT:%.*]])
+; CHECK-NEXT:    ret void
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 32, i8* %fmt) #2
+  ret void
+}
+
+define void @test_not_const_fmt_zero_size_return_value(i8* %buf, i8* %fmt) #0 {
+; CHECK-LABEL: @test_not_const_fmt_zero_size_return_value(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* [[BUF:%.*]], i64 0, i8* [[FMT:%.*]])
+; CHECK-NEXT:    ret void
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 0, i8* %fmt) #2
+  ret void
+}
+
+
+define void @test_not_const_size(i8* %buf, i64 %size) #0 {
+; CHECK-LABEL: @test_not_const_size(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* [[BUF:%.*]], i64 [[SIZE:%.*]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0))
+; CHECK-NEXT:    ret void
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 %size, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2
+  ret void
+}
+
+
+define i32 @test_return_value(i8* %buf) #0 {
+; CHECK-LABEL: @test_return_value(
+; CHECK-NEXT:    ret i32 3
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2
+  ret i32 %call
+}
+
+define void @test_percentage(i8* %buf) #0 {
+; CHECK-LABEL: @test_percentage(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* [[BUF:%.*]], i64 32, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    ret void
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 32, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0)) #2
+  ret void
+}
+
+define i32 @test_null_buf_return_value() #0 {
+; CHECK-LABEL: @test_null_buf_return_value(
+; CHECK-NEXT:    ret i32 3
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* null, i64 0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2
+  ret i32 %call
+}
+
+define i32 @test_percentage_return_value() #0 {
+; CHECK-LABEL: @test_percentage_return_value(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* null, i64 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* null, i64 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0)) #3
+  ret i32 %call
+}
+
+
+define void @test_correct_copy(i8* %buf) #0 {
+; CHECK-LABEL: @test_correct_copy(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    store i32 7500915, i32* [[TMP1]], align 1
+; CHECK-NEXT:    ret void
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 32, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2
+  ret void
+}
+
+define i32 @test_char_zero_size(i8* %buf) #0 {
+; CHECK-LABEL: @test_char_zero_size(
+; CHECK-NEXT:    ret i32 1
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.2, i64 0, i64 0), i32 65) #2
+  ret i32 %call
+}
+
+define i32 @test_char_wrong_size(i8* %buf) #0 {
+; CHECK-LABEL: @test_char_wrong_size(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* [[BUF:%.*]], i64 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.2, i64 0, i64 0), i32 65)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.2, i64 0, i64 0), i32 65) #2
+  ret i32 %call
+}
+
+define i32 @test_char_ok_size(i8* %buf) #0 {
+; CHECK-LABEL: @test_char_ok_size(
+; CHECK-NEXT:    store i8 65, i8* [[BUF:%.*]], align 1
+; CHECK-NEXT:    [[NUL:%.*]] = getelementptr i8, i8* [[BUF]], i64 1
+; CHECK-NEXT:    store i8 0, i8* [[NUL]], align 1
+; CHECK-NEXT:    ret i32 1
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 32, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.2, i64 0, i64 0), i32 65) #2
+  ret i32 %call
+}
+
+define i32 @test_str_zero_size(i8* %buf) #0 {
+; CHECK-LABEL: @test_str_zero_size(
+; CHECK-NEXT:    ret i32 3
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2
+  ret i32 %call
+}
+
+define i32 @test_str_wrong_size(i8* %buf) #0 {
+; CHECK-LABEL: @test_str_wrong_size(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* [[BUF:%.*]], i64 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0))
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2
+  ret i32 %call
+}
+
+define i32 @test_str_ok_size(i8* %buf) #0 {
+; CHECK-LABEL: @test_str_ok_size(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BUF:%.*]] to i32*
+; CHECK-NEXT:    store i32 7500915, i32* [[TMP1]], align 1
+; CHECK-NEXT:    ret i32 3
+;
+  %call = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %buf, i64 32, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0)) #2
+  ret i32 %call
+}
diff --git a/llvm/test/Transforms/InstCombine/sprintf-1.ll b/llvm/test/Transforms/InstCombine/sprintf-1.ll
new file mode 100644
index 00000000000..1fbdc43bd56
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sprintf-1.ll
@@ -0,0 +1,100 @@
+; Test that the sprintf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=CHECK-IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+@h = constant [2 x i8] c"h\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [3 x i8] c"%s\00"
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+; Check sprintf(dst, fmt) -> llvm.memcpy(str, fmt, strlen(fmt) + 1, 1).
+
+define void @test_simplify1(i8* %dst) {
+; CHECK-LABEL: @test_simplify1(
+  %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 13, i1 false)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2(i8* %dst) {
+; CHECK-LABEL: @test_simplify2(
+  %fmt = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: store i8 0, i8* %dst, align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(i8* %dst) {
+; CHECK-LABEL: @test_simplify3(
+  %fmt = getelementptr [7 x i8], [7 x i8]* @null_hello, i32 0, i32 0
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: store i8 0, i8* %dst, align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, "%c", chr) -> *(i8*)dst = chr; *((i8*)dst + 1) = 0.
+
+define void @test_simplify4(i8* %dst) {
+; CHECK-LABEL: @test_simplify4(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, i8 104)
+; CHECK-NEXT: store i8 104, i8* %dst, align 1
+; CHECK-NEXT: [[NUL:%[a-z0-9]+]] = getelementptr i8, i8* %dst, i32 1
+; CHECK-NEXT: store i8 0, i8* [[NUL]], align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, "%s", str) -> llvm.memcpy(dest, str, strlen(str) + 1, 1).
+
+define void @test_simplify5(i8* %dst, i8* %str) {
+; CHECK-LABEL: @test_simplify5(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_s, i32 0, i32 0
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, i8* %str)
+; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
+; CHECK-NEXT: [[LENINC:%[a-z0-9]+]] = add i32 [[STRLEN]], 1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %str, i32 [[LENINC]], i1 false)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, format, ...) -> siprintf(str, format, ...) if no floating.
+
+define void @test_simplify6(i8* %dst) {
+; CHECK-IPRINTF-LABEL: @test_simplify6(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, i32 187)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...) @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-IPRINTF-NEXT: ret void
+}
+
+define void @test_no_simplify1(i8* %dst) {
+; CHECK-IPRINTF-LABEL: @test_no_simplify1(
+  %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, double 1.87)
+; CHECK-IPRINTF-NEXT: call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-IPRINTF-NEXT: ret void
+}
+
+define void @test_no_simplify2(i8* %dst, i8* %fmt, double %d) {
+; CHECK-LABEL: @test_no_simplify2(
+  call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/sprintf-void.ll b/llvm/test/Transforms/InstCombine/sprintf-void.ll
new file mode 100644
index 00000000000..e84103be788
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sprintf-void.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+
+declare void @sprintf(i8*, i8*, ...)
+
+; Check that a sprintf call, that would otherwise be optimized, but with
+; optimized out return type, doesn't crash the optimizer.
+
+define void @test_simplify1(i8* %dst) {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT:    call void (i8*, i8*, ...) @sprintf(i8* [[DST:%.*]], i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0))
+; CHECK-NEXT:    ret void
+;
+  %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/sqrt-nofast.ll b/llvm/test/Transforms/InstCombine/sqrt-nofast.ll
new file mode 100644
index 00000000000..0d1dfc1542a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sqrt-nofast.ll
@@ -0,0 +1,25 @@
+; Check that we skip transformations if the attribute unsafe-fp-math
+; is not set.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mysqrt(float %x, float %y) #0 {
+entry:
+  %x.addr = alloca float, align 4
+  %y.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4
+  store float %y, float* %y.addr, align 4
+  %0 = load float, float* %x.addr, align 4
+  %1 = load float, float* %x.addr, align 4
+  %mul = fmul fast float %0, %1
+  %2 = call float @llvm.sqrt.f32(float %mul)
+  ret float %2
+}
+
+declare float @llvm.sqrt.f32(float) #1
+
+; CHECK: define float @mysqrt(float %x, float %y) {
+; CHECK: entry:
+; CHECK:   %mul = fmul fast float %x, %x
+; CHECK:   %0 = call float @llvm.sqrt.f32(float %mul)
+; CHECK:   ret float %0
+; CHECK: }
diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll
new file mode 100644
index 00000000000..bf44e4f841c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sqrt.ll
@@ -0,0 +1,54 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define float @test1(float %x) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: @test1(
+; CHECK-NOT: fpext
+; CHECK-NOT: sqrt(
+; CHECK: sqrtf(
+; CHECK-NOT: fptrunc
+  %conv = fpext float %x to double                ; <double> [#uses=1]
+  %call = tail call double @sqrt(double %conv) readnone nounwind ; <double> [#uses=1]
+  %conv1 = fptrunc double %call to float          ; <float> [#uses=1]
+; CHECK: ret float
+  ret float %conv1
+}
+
+; PR8096
+define float @test2(float %x) nounwind readnone ssp {
+entry:
+; CHECK-LABEL: @test2(
+; CHECK-NOT: fpext
+; CHECK-NOT: sqrt(
+; CHECK: sqrtf(
+; CHECK-NOT: fptrunc
+  %conv = fpext float %x to double                ; <double> [#uses=1]
+  %call = tail call double @sqrt(double %conv) nounwind ; <double> [#uses=1]
+  %conv1 = fptrunc double %call to float          ; <float> [#uses=1]
+; CHECK: ret float
+  ret float %conv1
+}
+
+; rdar://9763193
+; Can't fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) since there is another
+; use of sqrt result.
+define float @test3(float* %v) nounwind uwtable ssp {
+entry:
+; CHECK-LABEL: @test3(
+; CHECK: sqrt(
+; CHECK-NOT: sqrtf(
+; CHECK: fptrunc
+  %arrayidx13 = getelementptr inbounds float, float* %v, i64 2
+  %tmp14 = load float, float* %arrayidx13
+  %mul18 = fmul float %tmp14, %tmp14
+  %add19 = fadd float undef, %mul18
+  %conv = fpext float %add19 to double
+  %call34 = call double @sqrt(double %conv) readnone
+  %call36 = call i32 (double) @foo(double %call34) nounwind
+  %conv38 = fptrunc double %call34 to float
+  ret float %conv38
+}
+
+declare i32 @foo(double)
+
+declare double @sqrt(double) readnone
diff --git a/llvm/test/Transforms/InstCombine/srem-canonicalize.ll b/llvm/test/Transforms/InstCombine/srem-canonicalize.ll
new file mode 100644
index 00000000000..dc6b15c287c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/srem-canonicalize.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test_srem_canonicalize_op0(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_srem_canonicalize_op0(
+; CHECK-NEXT:    [[TMP1:%.*]] = srem i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SREM:%.*]] = sub nsw i32 0, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SREM]]
+;
+  %neg = sub nsw i32 0, %x
+  %srem = srem i32 %neg, %y
+  ret i32 %srem
+}
+
+; (X srem -Y) is not equal to -(X srem Y), don't canonicalize.
+define i32 @test_srem_canonicalize_op1(i32 %x, i32 %z) {
+; CHECK-LABEL: @test_srem_canonicalize_op1(
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[Z:%.*]], 3
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[SREM:%.*]] = srem i32 [[Y]], [[NEG]]
+; CHECK-NEXT:    ret i32 [[SREM]]
+;
+  %y = mul i32 %z, 3
+  %neg = sub nsw i32 0, %x
+  %srem = srem i32 %y, %neg
+  ret i32 %srem
+}
+
+define i32 @test_srem_canonicalize_nonsw(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_srem_canonicalize_nonsw(
+; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[SREM:%.*]] = srem i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[SREM]]
+;
+  %neg = sub i32 0, %x
+  %srem = srem i32 %neg, %y
+  ret i32 %srem
+}
+
+define <2 x i32> @test_srem_canonicalize_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test_srem_canonicalize_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = srem <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SREM:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[SREM]]
+;
+  %neg = sub nsw <2 x i32> <i32 0, i32 0>, %x
+  %srem = srem <2 x i32> %neg, %y
+  ret <2 x i32> %srem
+}
+
+define i32 @test_srem_canonicalize_multiple_uses(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_srem_canonicalize_multiple_uses(
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[SREM:%.*]] = srem i32 [[NEG]], [[Y:%.*]]
+; CHECK-NEXT:    [[SREM2:%.*]] = srem i32 [[SREM]], [[NEG]]
+; CHECK-NEXT:    ret i32 [[SREM2]]
+;
+  %neg = sub nsw i32 0, %x
+  %srem = srem i32 %neg, %y
+  %srem2 = srem i32 %srem, %neg
+  ret i32 %srem2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/srem-simplify-bug.ll b/llvm/test/Transforms/InstCombine/srem-simplify-bug.ll
new file mode 100644
index 00000000000..3458714b8da
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/srem-simplify-bug.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep "ret i1 false"
+; PR2276
+
+define i1 @f(i32 %x) {
+  %A = or i32 %x, 1
+  %B = srem i32 %A, 1
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
diff --git a/llvm/test/Transforms/InstCombine/srem1.ll b/llvm/test/Transforms/InstCombine/srem1.ll
new file mode 100644
index 00000000000..31452d8f208
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/srem1.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine
+; PR2670
+
+@g_127 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_56(i32 %p_58, i32 %p_59, i32 %p_61, i16 signext %p_62) nounwind {
+entry:
+	%call = call i32 (...) @rshift_s_s( i32 %p_61, i32 1 )		; <i32> [#uses=1]
+	%conv = sext i32 %call to i64		; <i64> [#uses=1]
+	%or = or i64 -1734012817166602727, %conv		; <i64> [#uses=1]
+	%rem = srem i64 %or, 1		; <i64> [#uses=1]
+	%cmp = icmp eq i64 %rem, 1		; <i1> [#uses=1]
+	%cmp.ext = zext i1 %cmp to i32		; <i32> [#uses=1]
+	store i32 %cmp.ext, i32* @g_127
+	ret i32 undef
+}
+
+declare i32 @rshift_s_s(...)
diff --git a/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll b/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll
new file mode 100644
index 00000000000..35528925f23
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32>, <2 x i32>)
+
+declare { <2 x i8>, <2 x i1> } @llvm.ssub.with.overflow.v2i8(<2 x i8>, <2 x i8>)
+
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)
+
+declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8)
+
+define { i32, i1 } @simple_fold(i32 %x) {
+; CHECK-LABEL: @simple_fold(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 -20)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = sub nsw i32 %x, 7
+  %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 13)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @fold_mixed_signs(i32 %x) {
+; CHECK-LABEL: @fold_mixed_signs(
+; CHECK-NEXT:    [[B:%.*]] = add nsw i32 [[X:%.*]], -6
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[B]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = sub nsw i32 %x, 13
+  %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 -7)
+  ret { i32, i1 } %b
+}
+
+define { i8, i1 } @fold_on_constant_sub_no_overflow(i8 %x) {
+; CHECK-LABEL: @fold_on_constant_sub_no_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 -128)
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %a = sub nsw i8 %x, 100
+  %b = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 28)
+  ret { i8, i1 } %b
+}
+
+define { i8, i1 } @no_fold_on_constant_sub_overflow(i8 %x) {
+; CHECK-LABEL: @no_fold_on_constant_sub_overflow(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], -100
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A]], i8 -29)
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %a = sub nsw i8 %x, 100
+  %b = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 29)
+  ret { i8, i1 } %b
+}
+
+define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
+; CHECK-LABEL: @fold_simple_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -42, i32 -42>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[TMP1]]
+;
+  %a = sub nsw <2 x i32> %x, <i32 12, i32 12>
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
+; CHECK-LABEL: @no_fold_splat_undef_constant(
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[X:%.*]], <i32 -12, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 -30, i32 -30>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[TMP1]]
+;
+  %a = sub nsw <2 x i32> %x, <i32 12, i32 undef>
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @no_fold_splat_not_constant(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 -30, i32 -30>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[TMP1]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { i32, i1 } @fold_nuwnsw(i32 %x) {
+; CHECK-LABEL: @fold_nuwnsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 -42)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = sub nuw nsw i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 30)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @no_fold_nuw(i32 %x) {
+; CHECK-LABEL: @no_fold_nuw(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -12
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A]], i32 -30)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = sub nuw i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 30)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @no_fold_wrapped_sub(i32 %x) {
+; CHECK-LABEL: @no_fold_wrapped_sub(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], -12
+; CHECK-NEXT:    [[B:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 30, i32 [[A]])
+; CHECK-NEXT:    ret { i32, i1 } [[B]]
+;
+  %a = sub i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 30, i32 %a)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @fold_add_simple(i32 %x) {
+; CHECK-LABEL: @fold_add_simple(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 -42)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = add nsw i32 %x, -12
+  %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 30)
+  ret { i32, i1 } %b
+}
+
+define { <2 x i32>, <2 x i1> } @keep_ssubo_undef(<2 x i32> %x) {
+; CHECK-LABEL: @keep_ssubo_undef(
+; CHECK-NEXT:    [[A:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 30, i32 undef>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[A]]
+;
+  %a = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %x, <2 x i32> <i32 30, i32 undef>)
+  ret { <2 x i32>, <2 x i1> } %a
+}
+
+define { <2 x i32>, <2 x i1> } @keep_ssubo_non_splat(<2 x i32> %x) {
+; CHECK-LABEL: @keep_ssubo_non_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -30, i32 -31>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[TMP1]]
+;
+  %a = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %x, <2 x i32> <i32 30, i32 31>)
+  ret { <2 x i32>, <2 x i1> } %a
+}
+
+define { <2 x i8>, <2 x i1> } @keep_ssubo_one_element_is_128(<2 x i8> %x) {
+; CHECK-LABEL: @keep_ssubo_one_element_is_128(
+; CHECK-NEXT:    [[A:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.ssub.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 0, i8 -128>)
+; CHECK-NEXT:    ret { <2 x i8>, <2 x i1> } [[A]]
+;
+  %a = tail call { <2 x i8>, <2 x i1> } @llvm.ssub.with.overflow.v2i8(<2 x i8> %x, <2 x i8> <i8 0, i8 -128>)
+  ret { <2 x i8>, <2 x i1> } %a
+}
+
+define { i8, i1 } @keep_ssubo_128(i8 %x) {
+; CHECK-LABEL: @keep_ssubo_128(
+; CHECK-NEXT:    [[A:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 -128)
+; CHECK-NEXT:    ret { i8, i1 } [[A]]
+;
+  %a = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 -128)
+  ret { i8, i1 } %a
+}
diff --git a/llvm/test/Transforms/InstCombine/stack-overalign.ll b/llvm/test/Transforms/InstCombine/stack-overalign.ll
new file mode 100644
index 00000000000..65d004008fa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stack-overalign.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -S | grep "align 32" | count 2
+
+; It's tempting to have an instcombine in which the src pointer of a
+; memcpy is aligned up to the alignment of the destination, however
+; there are pitfalls. If the src is an alloca, aligning it beyond what
+; the target's stack pointer is aligned at will require dynamic
+; stack realignment, which can require functions that don't otherwise
+; need a frame pointer to need one.
+;
+; Abstaining from this transform is not the only way to approach this
+; issue. Some late phase could be smart enough to reduce alloca
+; alignments when they are greater than they need to be. Or, codegen
+; could do dynamic alignment for just the one alloca, and leave the
+; main stack pointer at its standard alignment.
+;
+
+
+@dst = global [1024 x i8] zeroinitializer, align 32
+
+define void @foo() nounwind {
+entry:
+  %src = alloca [1024 x i8], align 1
+  %src1 = getelementptr [1024 x i8], [1024 x i8]* %src, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 32 getelementptr inbounds ([1024 x i8], [1024 x i8]* @dst, i32 0, i32 0), i8* align 32 %src1, i32 1024, i1 false)
+  call void @frob(i8* %src1) nounwind
+  ret void
+}
+
+declare void @frob(i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/stacksave-debuginfo.ll b/llvm/test/Transforms/InstCombine/stacksave-debuginfo.ll
new file mode 100644
index 00000000000..3c31c4c78a9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stacksave-debuginfo.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; dbg.value instrinsics should not affect peephole combining of stacksave/stackrestore.
+; PR37713
+; RUN: opt -instcombine %s -S | FileCheck %s
+
+declare i8* @llvm.stacksave() #0
+declare void @llvm.stackrestore(i8*) #0
+
+define i32* @test1(i32 %P) !dbg !6 {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[P:%.*]] to i64, !dbg !12
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, i64 [[TMP1]], align 4, !dbg !12
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32* [[A]], metadata !11, metadata !DIExpression()), !dbg !12
+; CHECK-NEXT:    ret i32* [[A]], !dbg !13
+;
+  %tmp = call i8* @llvm.stacksave(), !dbg !12
+  call void @llvm.dbg.value(metadata i8* %tmp, metadata !9, metadata !DIExpression()), !dbg !12
+  call void @llvm.stackrestore(i8* %tmp), !dbg !13
+  %A = alloca i32, i32 %P, !dbg !14
+  call void @llvm.dbg.value(metadata i32* %A, metadata !11, metadata !DIExpression()), !dbg !14
+  ret i32* %A, !dbg !15
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!3, !4}
+!llvm.module.flags = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "patatino.ll", directory: "/")
+!2 = !{}
+!3 = !{i32 4}
+!4 = !{i32 2}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = distinct !DISubprogram(name: "test1", linkageName: "test1", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)
+!7 = !DISubroutineType(types: !2)
+!8 = !{!9, !11}
+!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
+!10 = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned)
+!11 = !DILocalVariable(name: "2", scope: !6, file: !1, line: 3, type: !10)
+!12 = !DILocation(line: 1, column: 1, scope: !6)
+!13 = !DILocation(line: 2, column: 1, scope: !6)
+!14 = !DILocation(line: 3, column: 1, scope: !6)
+!15 = !DILocation(line: 4, column: 1, scope: !6)
diff --git a/llvm/test/Transforms/InstCombine/stacksaverestore.ll b/llvm/test/Transforms/InstCombine/stacksaverestore.ll
new file mode 100644
index 00000000000..9eb0efb1911
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stacksaverestore.ll
@@ -0,0 +1,112 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@glob = global i32 0
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+;; Test that llvm.stackrestore is removed when possible.
+define i32* @test1(i32 %P) {
+	%tmp = call i8* @llvm.stacksave( )
+	call void @llvm.stackrestore( i8* %tmp ) ;; not restoring anything
+	%A = alloca i32, i32 %P		
+	ret i32* %A
+}
+
+; CHECK-LABEL: define i32* @test1(
+; CHECK-NOT: call void @llvm.stackrestore
+; CHECK: ret i32*
+
+define void @test2(i8* %X) {
+	call void @llvm.stackrestore( i8* %X )  ;; no allocas before return.
+	ret void
+}
+
+; CHECK-LABEL: define void @test2(
+; CHECK-NOT: call void @llvm.stackrestore
+; CHECK: ret void
+
+define void @foo(i32 %size) nounwind  {
+entry:
+	%tmp118124 = icmp sgt i32 %size, 0		; <i1> [#uses=1]
+	br i1 %tmp118124, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	%tmp25 = add i32 %size, -1		; <i32> [#uses=1]
+	%tmp125 = icmp slt i32 %size, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp125, i32 1, i32 %size		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%tmp = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp23 = alloca i8, i32 %size		; <i8*> [#uses=2]
+	%tmp27 = getelementptr i8, i8* %tmp23, i32 %tmp25		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp27, align 1
+	%tmp28 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp52 = alloca i8, i32 %size		; <i8*> [#uses=1]
+	%tmp53 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp77 = alloca i8, i32 %size		; <i8*> [#uses=1]
+	%tmp78 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp102 = alloca i8, i32 %size		; <i8*> [#uses=1]
+	call void @bar( i32 %i.0.reg2mem.0, i8* %tmp23, i8* %tmp52, i8* %tmp77, i8* %tmp102, i32 %size ) nounwind 
+	call void @llvm.stackrestore( i8* %tmp78 )
+	call void @llvm.stackrestore( i8* %tmp53 )
+	call void @llvm.stackrestore( i8* %tmp28 )
+	call void @llvm.stackrestore( i8* %tmp )
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+; CHECK-LABEL: define void @foo(
+; CHECK: %tmp = call i8* @llvm.stacksave()
+; CHECK: alloca i8
+; CHECK-NOT: stacksave
+; CHECK: call void @bar(
+; CHECK-NEXT: call void @llvm.stackrestore(i8* %tmp)
+; CHECK: ret void
+
+declare void @bar(i32, i8*, i8*, i8*, i8*, i32)
+
+declare void @inalloca_callee(i32* inalloca)
+
+define void @test3(i32 %c) {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [0, %entry], [%i1, %loop]
+  %save1 = call i8* @llvm.stacksave()
+  %argmem = alloca inalloca i32
+  store i32 0, i32* %argmem
+  call void @inalloca_callee(i32* inalloca %argmem)
+
+  ; This restore cannot be deleted, the restore below does not make it dead.
+  call void @llvm.stackrestore(i8* %save1)
+
+  ; FIXME: We should be able to remove this save/restore pair, but we don't.
+  %save2 = call i8* @llvm.stacksave()
+  store i32 0, i32* @glob
+  call void @llvm.stackrestore(i8* %save2)
+  %i1 = add i32 1, %i
+  %done = icmp eq i32 %i1, %c
+  br i1 %done, label %loop, label %return
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: define void @test3(
+; CHECK: loop:
+; CHECK: %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+; CHECK: %save1 = call i8* @llvm.stacksave()
+; CHECK: %argmem = alloca inalloca i32
+; CHECK: store i32 0, i32* %argmem
+; CHECK: call void @inalloca_callee(i32* inalloca {{.*}} %argmem)
+; CHECK: call void @llvm.stackrestore(i8* %save1)
+; CHECK: br i1 %done, label %loop, label %return
+; CHECK: ret void
diff --git a/llvm/test/Transforms/InstCombine/statepoint.ll b/llvm/test/Transforms/InstCombine/statepoint.ll
new file mode 100644
index 00000000000..54fb6a7756f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/statepoint.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; These tests check the optimizations specific to
+; pointers being relocated at a statepoint.
+
+
+declare void @func()
+
+define i1 @test_negative(i32 addrspace(1)* %p) gc "statepoint-example" {
+entry:
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_negative
+; CHECK: %pnew = call i32 addrspace(1)*
+; CHECK: ret i1 %cmp
+}
+
+define i1 @test_nonnull(i32 addrspace(1)* nonnull %p) gc "statepoint-example" {
+entry:
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_nonnull
+; CHECK: ret i1 false
+}
+
+define i1 @test_null() gc "statepoint-example" {
+entry:
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_null
+; CHECK-NOT: %pnew
+; CHECK: ret i1 true
+}
+
+define i1 @test_undef() gc "statepoint-example" {
+entry:
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_undef
+; CHECK-NOT: %pnew
+; CHECK: ret i1 undef
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
diff --git a/llvm/test/Transforms/InstCombine/store-load-unaliased-gep.ll b/llvm/test/Transforms/InstCombine/store-load-unaliased-gep.ll
new file mode 100644
index 00000000000..cdeee317055
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/store-load-unaliased-gep.ll
@@ -0,0 +1,23 @@
+; RUN: opt -instcombine %s -S 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes=instcombine %s -S 2>&1 | FileCheck %s
+
+; Checking successful store-load optimization of array length.
+; Function below should deduce just to "return length".
+; Doable only if instcombine has access to alias-analysis.
+
+define i32 @test1(i32 %length) {
+; CHECK-LABEL: entry:
+entry:
+  %array = alloca i32, i32 2
+  ; CHECK-NOT: %array
+
+  %length_gep = getelementptr inbounds i32, i32 * %array, i32 0
+  %value_gep = getelementptr inbounds i32, i32 * %array, i32 1
+  store i32 %length, i32 * %length_gep
+  store i32 0, i32 * %value_gep
+  %loaded_length = load i32, i32 * %length_gep
+  ; CHECK-NOT: %loaded_length = load i32
+
+  ret i32 %loaded_length
+  ; CHECK: ret i32 %length
+}
diff --git a/llvm/test/Transforms/InstCombine/store.ll b/llvm/test/Transforms/InstCombine/store.ll
new file mode 100644
index 00000000000..c7c8374f10e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/store.ll
@@ -0,0 +1,309 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test1(i32* %P) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    store i32 123, i32* undef, align 4
+; CHECK-NEXT:    store i32 undef, i32* null, align 536870912
+; CHECK-NEXT:    ret void
+;
+  store i32 undef, i32* %P
+  store i32 123, i32* undef
+  store i32 124, i32* null
+  ret void
+}
+
+define void @test2(i32* %P) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret void
+;
+  %X = load i32, i32* %P
+  %Y = add i32 %X, 0
+  store i32 %Y, i32* %P
+  ret void
+}
+
+define void @store_at_gep_off_null(i64 %offset) {
+; CHECK-LABEL: @store_at_gep_off_null(
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i32, i32* null, i64 [[OFFSET:%.*]]
+; CHECK-NEXT:    store i32 undef, i32* [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %ptr = getelementptr i32, i32 *null, i64 %offset
+  store i32 24, i32* %ptr
+  ret void
+}
+
+define void @store_at_gep_off_no_null_opt(i64 %offset) #0 {
+; CHECK-LABEL: @store_at_gep_off_no_null_opt(
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i32, i32* null, i64 [[OFFSET:%.*]]
+; CHECK-NEXT:    store i32 24, i32* [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %ptr = getelementptr i32, i32 *null, i64 %offset
+  store i32 24, i32* %ptr
+  ret void
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }
+
+;; Simple sinking tests
+
+; "if then else"
+define i32 @test3(i1 %C) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[COND:%.*]], label [[COND2:%.*]]
+; CHECK:       Cond:
+; CHECK-NEXT:    br label [[CONT:%.*]]
+; CHECK:       Cond2:
+; CHECK-NEXT:    br label [[CONT]]
+; CHECK:       Cont:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ -987654321, [[COND]] ], [ 47, [[COND2]] ]
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %A = alloca i32
+  br i1 %C, label %Cond, label %Cond2
+
+Cond:
+  store i32 -987654321, i32* %A
+  br label %Cont
+
+Cond2:
+  store i32 47, i32* %A
+  br label %Cont
+
+Cont:
+  %V = load i32, i32* %A
+  ret i32 %V
+}
+
+; "if then"
+define i32 @test4(i1 %C) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[COND:%.*]], label [[CONT:%.*]]
+; CHECK:       Cond:
+; CHECK-NEXT:    br label [[CONT]]
+; CHECK:       Cont:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ -987654321, [[COND]] ], [ 47, [[TMP0:%.*]] ]
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %A = alloca i32
+  store i32 47, i32* %A
+  br i1 %C, label %Cond, label %Cont
+
+Cond:
+  store i32 -987654321, i32* %A
+  br label %Cont
+
+Cont:
+  %V = load i32, i32* %A
+  ret i32 %V
+}
+
+; "if then"
+define void @test5(i1 %C, i32* %P) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[COND:%.*]], label [[CONT:%.*]]
+; CHECK:       Cond:
+; CHECK-NEXT:    br label [[CONT]]
+; CHECK:       Cont:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ -987654321, [[COND]] ], [ 47, [[TMP0:%.*]] ]
+; CHECK-NEXT:    store i32 [[STOREMERGE]], i32* [[P:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+  store i32 47, i32* %P, align 1
+  br i1 %C, label %Cond, label %Cont
+
+Cond:
+  store i32 -987654321, i32* %P, align 1
+  br label %Cont
+
+Cont:
+  ret void
+}
+
+
+; PR14753 - merging two stores should preserve the TBAA tag.
+define void @test6(i32 %n, float* %a, i32* %gi) nounwind uwtable ssp {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    store i32 [[STOREMERGE]], i32* [[GI:%.*]], align 4, !tbaa !0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[STOREMERGE]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STOREMERGE]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !tbaa !4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GI]], align 4, !tbaa !0
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i32 42, i32* %gi, align 4, !tbaa !0
+  br label %for.cond
+
+for.cond:
+  %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32, i32* %gi, align 4, !tbaa !0
+  %cmp = icmp slt i32 %0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = sext i32 %0 to i64
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %idxprom
+  store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
+  %1 = load i32, i32* %gi, align 4, !tbaa !0
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %gi, align 4, !tbaa !0
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+define void @dse1(i32* %p) {
+; CHECK-LABEL: @dse1(
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, i32* %p
+  store i32 0, i32* %p
+  ret void
+}
+
+; Slightly subtle: if we're mixing atomic and non-atomic access to the
+; same location, then the contents of the location are undefined if there's
+; an actual race.  As such, we're free to pick either store under the
+; assumption that we're not racing with any other thread.
+define void @dse2(i32* %p) {
+; CHECK-LABEL: @dse2(
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic i32 0, i32* %p unordered, align 4
+  store i32 0, i32* %p
+  ret void
+}
+
+define void @dse3(i32* %p) {
+; CHECK-LABEL: @dse3(
+; CHECK-NEXT:    store atomic i32 0, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, i32* %p
+  store atomic i32 0, i32* %p unordered, align 4
+  ret void
+}
+
+define void @dse4(i32* %p) {
+; CHECK-LABEL: @dse4(
+; CHECK-NEXT:    store atomic i32 0, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic i32 0, i32* %p unordered, align 4
+  store atomic i32 0, i32* %p unordered, align 4
+  ret void
+}
+
+; Implementation limit - could remove unordered store here, but
+; currently don't.
+define void @dse5(i32* %p) {
+; CHECK-LABEL: @dse5(
+; CHECK-NEXT:    store atomic i32 0, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 0, i32* [[P]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic i32 0, i32* %p unordered, align 4
+  store atomic i32 0, i32* %p seq_cst, align 4
+  ret void
+}
+
+define void @write_back1(i32* %p) {
+; CHECK-LABEL: @write_back1(
+; CHECK-NEXT:    ret void
+;
+  %v = load i32, i32* %p
+  store i32 %v, i32* %p
+  ret void
+}
+
+define void @write_back2(i32* %p) {
+; CHECK-LABEL: @write_back2(
+; CHECK-NEXT:    ret void
+;
+  %v = load atomic i32, i32* %p unordered, align 4
+  store i32 %v, i32* %p
+  ret void
+}
+
+define void @write_back3(i32* %p) {
+; CHECK-LABEL: @write_back3(
+; CHECK-NEXT:    ret void
+;
+  %v = load i32, i32* %p
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+}
+
+define void @write_back4(i32* %p) {
+; CHECK-LABEL: @write_back4(
+; CHECK-NEXT:    ret void
+;
+  %v = load atomic i32, i32* %p unordered, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+}
+
+; Can't remove store due to ordering side effect
+define void @write_back5(i32* %p) {
+; CHECK-LABEL: @write_back5(
+; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 [[V]], i32* [[P]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  %v = load atomic i32, i32* %p unordered, align 4
+  store atomic i32 %v, i32* %p seq_cst, align 4
+  ret void
+}
+
+define void @write_back6(i32* %p) {
+; CHECK-LABEL: @write_back6(
+; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  %v = load atomic i32, i32* %p seq_cst, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+}
+
+define void @write_back7(i32* %p) {
+; CHECK-LABEL: @write_back7(
+; CHECK-NEXT:    [[V:%.*]] = load atomic volatile i32, i32* [[P:%.*]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  %v = load atomic volatile i32, i32* %p seq_cst, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+}
+
+@Unknown = external constant i32
+
+define void @store_to_constant() {
+; CHECK-LABEL: @store_to_constant(
+; CHECK-NEXT:    store i32 0, i32* @Unknown, align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, i32* @Unknown
+  ret void
+}
+
+!0 = !{!4, !4, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"float", !1}
+!4 = !{!"int", !1}
diff --git a/llvm/test/Transforms/InstCombine/storemerge-dbg.ll b/llvm/test/Transforms/InstCombine/storemerge-dbg.ll
new file mode 100644
index 00000000000..dc40dd7f787
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/storemerge-dbg.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -debugify -instcombine -S | FileCheck %s
+
+declare i32 @escape(i32)
+
+; CHECK-LABEL: define {{.*}}@foo(
+define i32 @foo() {
+entry:
+  %baz = alloca i32
+  br i1 undef, label %lhs, label %rhs
+
+lhs:
+  store i32 1, i32* %baz
+  br label %cleanup
+
+rhs:
+  store i32 2, i32* %baz
+  br label %cleanup
+
+cleanup:
+  ; CHECK: %storemerge = phi i32 [ 1, %lhs ], [ 2, %rhs ], !dbg [[merge_loc:![0-9]+]]
+  %baz.val = load i32, i32* %baz
+  %ret.val = call i32 @escape(i32 %baz.val)
+  ret i32 %ret.val
+}
+
+; CHECK: [[merge_loc]] = !DILocation(line: 0
diff --git a/llvm/test/Transforms/InstCombine/stpcpy-1.ll b/llvm/test/Transforms/InstCombine/stpcpy-1.ll
new file mode 100644
index 00000000000..cc828993707
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stpcpy-1.ll
@@ -0,0 +1,46 @@
+; Test that the stpcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @stpcpy(i8*, i8*)
+
+define i8* @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  %ret = call i8* @stpcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NEXT: getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 5)
+  ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+
+  %ret = call i8* @stpcpy(i8* %dst, i8* %dst)
+; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
+; CHECK-NEXT: getelementptr inbounds [32 x i8], [32 x i8]* @a, i32 0, i32 [[LEN]]
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0
+
+  %ret = call i8* @stpcpy(i8* %dst, i8* %src)
+; CHECK: call i8* @stpcpy
+  ret i8* %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/stpcpy-2.ll b/llvm/test/Transforms/InstCombine/stpcpy-2.ll
new file mode 100644
index 00000000000..07e13a69e2d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stpcpy-2.ll
@@ -0,0 +1,22 @@
+; Test that the stpcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @stpcpy(i8*, i8*)
+
+define void @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  call i16* @stpcpy(i8* %dst, i8* %src)
+; CHECK: call i16* @stpcpy
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
new file mode 100644
index 00000000000..b2e0416ae3d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -0,0 +1,103 @@
+; Test lib call simplification of __stpcpy_chk calls with various values
+; for src, dst, and slen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [12 x i8] c"abcdefghijk\00"
+
+; Check cases where slen >= strlen (src).
+
+define i8* @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12)
+  ret i8* %ret
+}
+
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
+; Check cases where there are no string constants.
+
+define i8* @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: %stpcpy = call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT: ret i8* %stpcpy
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
+; Check case where the string length is not constant.
+
+define i8* @test_simplify5() {
+; CHECK-LABEL: @test_simplify5(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false, i1 false)
+; CHECK-NEXT: %1 = call i8* @__memcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 %len)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false, i1 false, i1 false)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
+  ret i8* %ret
+}
+
+; Check case where the source and destination are the same.
+
+define i8* @test_simplify6() {
+; CHECK-LABEL: @test_simplify6(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+
+; CHECK-NEXT: %strlen = call i32 @strlen(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0))
+; CHECK-NEXT: %1 = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 %strlen
+; CHECK-NEXT: ret i8* %1
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false, i1 false, i1 false)
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
+  ret i8* %ret
+}
+
+; Check case where slen < strlen (src).
+
+define i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: %ret = call i8* @__stpcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0), i32 8)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 8)
+  ret i8* %ret
+}
+
+declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) nounwind readonly
diff --git a/llvm/test/Transforms/InstCombine/stpcpy_chk-2.ll b/llvm/test/Transforms/InstCombine/stpcpy_chk-2.ll
new file mode 100644
index 00000000000..b4803f98b24
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stpcpy_chk-2.ll
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __stpcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i16] zeroinitializer, align 1
+@.str = private constant [8 x i8] c"abcdefg\00"
+
+define void @test_no_simplify() {
+; CHECK-LABEL: @test_no_simplify(
+  %dst = getelementptr inbounds [60 x i16], [60 x i16]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [8 x i8], [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strcpy_chk
+  call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8)
+  ret void
+}
+
+declare i16* @__strcpy_chk(i16*, i8*, i32)
diff --git a/llvm/test/Transforms/InstCombine/str-int-2.ll b/llvm/test/Transforms/InstCombine/str-int-2.ll
new file mode 100644
index 00000000000..37e64a31ee8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/str-int-2.ll
@@ -0,0 +1,131 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@.str = private unnamed_addr constant [3 x i8] c"12\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"0\00", align 1
+@.str.2 = private unnamed_addr constant [11 x i8] c"4294967296\00", align 1
+@.str.3 = private unnamed_addr constant [24 x i8] c"10000000000000000000000\00", align 1
+@.str.4 = private unnamed_addr constant [20 x i8] c"9923372036854775807\00", align 1
+@.str.5 = private unnamed_addr constant [11 x i8] c"4994967295\00", align 1
+@.str.6 = private unnamed_addr constant [10 x i8] c"499496729\00", align 1
+@.str.7 = private unnamed_addr constant [11 x i8] c"4994967295\00", align 1
+
+declare i64 @strtol(i8*, i8**, i32)
+declare i32 @atoi(i8*)
+declare i64 @atol(i8*)
+declare i64 @atoll(i8*)
+declare i64 @strtoll(i8*, i8**, i32)
+
+define i64 @strtol_dec() #0 {
+; CHECK-LABEL: @strtol_dec(
+; CHECK-NEXT:    ret i64 12
+;
+  %call = call i64 @strtol(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8** null, i32 10) #2
+  ret i64 %call
+}
+
+define i64 @strtol_base_zero() #0 {
+; CHECK-LABEL: @strtol_base_zero(
+; CHECK-NEXT:    ret i64 12
+;
+  %call = call i64 @strtol(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8** null, i32 0) #2
+  ret i64 %call
+}
+
+define i64 @strtol_hex() #0 {
+; CHECK-LABEL: @strtol_hex(
+; CHECK-NEXT:    ret i64 18
+;
+  %call = call i64 @strtol(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8** null, i32 16) #2
+  ret i64 %call
+}
+
+define i64 @strtol_endptr_not_null() #0 {
+; CHECK-LABEL: @strtol_endptr_not_null(
+; CHECK-NEXT:    [[END:%.*]] = alloca i8*, align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i64 @strtol(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0), i8** nonnull [[END]], i32 10)
+; CHECK-NEXT:    ret i64 [[CALL]]
+;
+  %end = alloca i8*, align 4
+  %call = call i64 @strtol(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i8** %end, i32 10) #2
+  ret i64 %call
+}
+
+define i32 @atoi_test() #0 {
+; CHECK-LABEL: @atoi_test(
+; CHECK-NEXT:    ret i32 12
+;
+  %call = call i32 @atoi(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) #4
+  ret i32 %call
+}
+
+define i64 @strtol_not_const_str(i8* %s) #0 {
+; CHECK-LABEL: @strtol_not_const_str(
+; CHECK-NEXT:    [[CALL:%.*]] = call i64 @strtol(i8* nocapture [[S:%.*]], i8** null, i32 10)
+; CHECK-NEXT:    ret i64 [[CALL]]
+;
+  %call = call i64 @strtol(i8* %s, i8** null, i32 10) #3
+  ret i64 %call
+}
+
+define i32 @atoi_not_const_str(i8* %s) #0 {
+; CHECK-LABEL: @atoi_not_const_str(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @atoi(i8* [[S:%.*]])
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 @atoi(i8* %s) #4
+  ret i32 %call
+}
+
+define i64 @strtol_not_const_base(i32 %b) #0 {
+; CHECK-LABEL: @strtol_not_const_base(
+; CHECK-NEXT:    [[CALL:%.*]] = call i64 @strtol(i8* nocapture getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i8** null, i32 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[CALL]]
+;
+  %call = call i64 @strtol(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8** null, i32 %b) #2
+  ret i64 %call
+}
+
+define i64 @strtol_long_int() #0 {
+; CHECK-LABEL: @strtol_long_int(
+; CHECK-NEXT:    ret i64 4294967296
+;
+  %call = call i64 @strtol(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i32 0, i32 0), i8** null, i32 10) #3
+  ret i64 %call
+}
+
+
+define i64 @strtol_big_overflow() #0 {
+; CHECK-LABEL: @strtol_big_overflow(
+; CHECK-NEXT:    [[CALL:%.*]] = call i64 @strtol(i8* nocapture getelementptr inbounds ([24 x i8], [24 x i8]* @.str.3, i64 0, i64 0), i8** null, i32 10)
+; CHECK-NEXT:    ret i64 [[CALL]]
+;
+  %call = call i64 @strtol(i8* nocapture getelementptr inbounds ([24 x i8], [24 x i8]* @.str.3, i64 0, i64 0), i8** null, i32 10) #2
+  ret i64 %call
+}
+
+define i64 @atol_test() #0 {
+; CHECK-LABEL: @atol_test(
+; CHECK-NEXT:    ret i64 499496729
+;
+; CHECK-NEXT
+  %call = call i64 @atol(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.6, i32 0, i32 0)) #4
+  ret i64 %call
+}
+
+define i64 @atoll_test() #0 {
+; CHECK-LABEL: @atoll_test(
+; CHECK-NEXT:    ret i64 4994967295
+;
+  %call = call i64 @atoll(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.5, i32 0, i32 0)) #3
+  ret i64 %call
+}
+
+define i64 @strtoll_test() #0 {
+; CHECK-LABEL: @strtoll_test(
+; CHECK-NEXT:    ret i64 4994967295
+;
+; CHECK-NEXT
+  %call = call i64 @strtoll(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.7, i32 0, i32 0), i8** null, i32 10) #5
+  ret i64 %call
+}
diff --git a/llvm/test/Transforms/InstCombine/str-int.ll b/llvm/test/Transforms/InstCombine/str-int.ll
new file mode 100644
index 00000000000..ac5b6ceccce
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/str-int.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@.str = private unnamed_addr constant [3 x i8] c"12\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"0\00", align 1
+@.str.2 = private unnamed_addr constant [11 x i8] c"4294967296\00", align 1
+@.str.3 = private unnamed_addr constant [24 x i8] c"10000000000000000000000\00", align 1
+@.str.4 = private unnamed_addr constant [20 x i8] c"9923372036854775807\00", align 1
+@.str.5 = private unnamed_addr constant [11 x i8] c"4994967295\00", align 1
+@.str.6 = private unnamed_addr constant [10 x i8] c"499496729\00", align 1
+@.str.7 = private unnamed_addr constant [11 x i8] c"4994967295\00", align 1
+
+declare i32 @strtol(i8*, i8**, i32)
+declare i32 @atoi(i8*)
+declare i32 @atol(i8*)
+declare i32 @atoll(i8*)
+declare i32 @strtoll(i8*, i8**, i32)
+
+define i32 @strtol_dec() #0 {
+; CHECK-LABEL: @strtol_dec(
+; CHECK-NEXT:    ret i32 12
+;
+  %call = call i32 @strtol(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8** null, i32 10) #2
+  ret i32 %call
+}
+
+define i32 @strtol_base_zero() #0 {
+; CHECK-LABEL: @strtol_base_zero(
+; CHECK-NEXT:    ret i32 12
+;
+  %call = call i32 @strtol(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8** null, i32 0) #2
+  ret i32 %call
+}
+
+define i32 @strtol_hex() #0 {
+; CHECK-LABEL: @strtol_hex(
+; CHECK-NEXT:    ret i32 18
+;
+  %call = call i32 @strtol(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8** null, i32 16) #2
+  ret i32 %call
+}
+
+define i32 @strtol_endptr_not_null() #0 {
+; CHECK-LABEL: @strtol_endptr_not_null(
+; CHECK-NEXT:    [[END:%.*]] = alloca i8*, align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strtol(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0), i8** nonnull [[END]], i32 10)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %end = alloca i8*, align 4
+  %call = call i32 @strtol(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i8** %end, i32 10) #2
+  ret i32 %call
+}
+
+define i32 @atoi_test() #0 {
+; CHECK-LABEL: @atoi_test(
+; CHECK-NEXT:    ret i32 12
+;
+  %call = call i32 @atoi(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) #4
+  ret i32 %call
+}
+
+define i32 @strtol_not_const_str(i8* %s) #0 {
+; CHECK-LABEL: @strtol_not_const_str(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strtol(i8* nocapture [[S:%.*]], i8** null, i32 10)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 @strtol(i8* %s, i8** null, i32 10) #3
+  ret i32 %call
+}
+
+define i32 @atoi_not_const_str(i8* %s) #0 {
+; CHECK-LABEL: @atoi_not_const_str(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @atoi(i8* [[S:%.*]])
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 @atoi(i8* %s) #4
+  ret i32 %call
+}
+
+define i32 @strtol_not_const_base(i32 %b) #0 {
+; CHECK-LABEL: @strtol_not_const_base(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strtol(i8* nocapture getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i8** null, i32 [[B:%.*]])
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 @strtol(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8** null, i32 %b) #2
+  ret i32 %call
+}
+
+define i32 @strtol_long_int() #0 {
+; CHECK-LABEL: @strtol_long_int(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strtol(i8* nocapture getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), i8** null, i32 10)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 @strtol(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i32 0, i32 0), i8** null, i32 10) #3
+  ret i32 %call
+}
+
+
+define i32 @strtol_big_overflow() #0 {
+; CHECK-LABEL: @strtol_big_overflow(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strtol(i8* nocapture getelementptr inbounds ([24 x i8], [24 x i8]* @.str.3, i64 0, i64 0), i8** null, i32 10)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 @strtol(i8* nocapture getelementptr inbounds ([24 x i8], [24 x i8]* @.str.3, i32 0, i32 0), i8** null, i32 10) #2
+  ret i32 %call
+}
+
+define i32 @atol_test() #0 {
+; CHECK-LABEL: @atol_test(
+; CHECK-NEXT:    ret i32 499496729
+;
+; CHECK-NEXT
+  %call = call i32 @atol(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.6, i32 0, i32 0)) #4
+  ret i32 %call
+}
+
+define i32 @atoll_test() #0 {
+; CHECK-LABEL: @atoll_test(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @atoll(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.5, i64 0, i64 0))
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %call = call i32 @atoll(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.5, i32 0, i32 0)) #3
+  ret i32 %call
+}
+
+define i32 @strtoll_test() #0 {
+; CHECK-LABEL: @strtoll_test(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strtoll(i8* nocapture getelementptr inbounds ([11 x i8], [11 x i8]* @.str.7, i64 0, i64 0), i8** null, i32 10)
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+; CHECK-NEXT
+  %call = call i32 @strtoll(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.7, i32 0, i32 0), i8** null, i32 10) #5
+  ret i32 %call
+}
diff --git a/llvm/test/Transforms/InstCombine/strcat-1.ll b/llvm/test/Transforms/InstCombine/strcat-1.ll
new file mode 100644
index 00000000000..446a26e94ac
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcat-1.ll
@@ -0,0 +1,38 @@
+; Test that the strcat libcall simplifier works correctly per the
+; bug found in PR3661.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+
+declare i8* @strcat(i8*, i8*)
+declare i32 @puts(i8*)
+
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK-NOT: call i8* @strcat
+; CHECK: call i32 @puts
+
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
+  store i8 0, i8* %arg1
+
+  ; rslt1 = strcat(target, "hello\00")
+  %arg2 = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strcat(i8* %arg1, i8* %arg2)
+
+  ; rslt2 = strcat(rslt1, "\00")
+  %arg3 = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %rslt2 = call i8* @strcat(i8* %rslt1, i8* %arg3)
+
+  ; rslt3 = strcat(rslt2, "\00hello\00")
+  %arg4 = getelementptr [7 x i8], [7 x i8]* @null_hello, i32 0, i32 0
+  %rslt3 = call i8* @strcat(i8* %rslt2, i8* %arg4)
+
+  call i32 @puts( i8* %rslt3 )
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/InstCombine/strcat-2.ll b/llvm/test/Transforms/InstCombine/strcat-2.ll
new file mode 100644
index 00000000000..287019770ac
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcat-2.ll
@@ -0,0 +1,32 @@
+; Test that the strcat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcat(i8*, i8*)
+
+define void @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NOT: call i8* @strcat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strcat(i8* %dst, i8* %src)
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  call i8* @strcat(i8* %dst, i8* %src)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strcat-3.ll b/llvm/test/Transforms/InstCombine/strcat-3.ll
new file mode 100644
index 00000000000..88cd162d0cd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcat-3.ll
@@ -0,0 +1,22 @@
+; Test that the strcat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strcat(i8*, i8*)
+
+define void @test_nosimplify1() {
+; CHECK-LABEL: @test_nosimplify1(
+; CHECK: call i16* @strcat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  call i16* @strcat(i8* %dst, i8* %src)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strchr-1.ll b/llvm/test/Transforms/InstCombine/strchr-1.ll
new file mode 100644
index 00000000000..4fce378f59f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strchr-1.ll
@@ -0,0 +1,96 @@
+; Test that the strchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@newlines = constant [3 x i8] c"\0D\0A\00"
+@chp = global i8* zeroinitializer
+
+declare i8* @strchr(i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 6)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: store i8* null, i8** @chp, align 4
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %str = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 0)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify4(i32 %chr) {
+; CHECK: call i8* @memchr
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 %chr)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify5() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+; Check transformation strchr(p, 0) -> p + strlen(p)
+define void @test_simplify6(i8* %str) {
+; CHECK: %strlen = call i32 @strlen(i8* %str)
+; CHECK-NOT: call i8* @strchr
+; CHECK: %strchr = getelementptr i8, i8* %str, i32 %strlen
+; CHECK: store i8* %strchr, i8** @chp, align 4
+; CHECK: ret void
+
+  %dst = call i8* @strchr(i8* %str, i32 0)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+; Check transformation strchr("\r\n", C) != nullptr -> (C & 9217) != 0
+define i1 @test_simplify7(i32 %C) {
+; CHECK-LABEL: @test_simplify7
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 %C to i16
+; CHECK-NEXT: [[TRUNC_AND:%.*]] = and i16 [[TRUNC]], 255
+; CHECK-NEXT: %memchr.bounds = icmp ult i16 [[TRUNC_AND]], 16
+; CHECK-NEXT: [[SHL:%.*]] = shl i16 1, [[TRUNC_AND]]
+; CHECK-NEXT: [[AND:%.*]] = and i16 [[SHL]], 9217
+; CHECK-NEXT: %memchr.bits = icmp ne i16 [[AND]], 0
+; CHECK-NEXT: %memchr1 = and i1 %memchr.bounds, %memchr.bits
+; CHECK-NEXT: ret i1 %memchr1
+
+  %dst = call i8* @strchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @newlines, i64 0, i64 0), i32 %C)
+  %cmp = icmp ne i8* %dst, null
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/strchr-2.ll b/llvm/test/Transforms/InstCombine/strchr-2.ll
new file mode 100644
index 00000000000..dd86a16f77b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strchr-2.ll
@@ -0,0 +1,21 @@
+; Test that the strchr libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chr = global i8 zeroinitializer
+
+declare i8 @strchr(i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: test_nosimplify1
+; CHECK: call i8 @strchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8 @strchr(i8* %str, i32 119)
+  store i8 %dst, i8* @chr
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strcmp-1.ll b/llvm/test/Transforms/InstCombine/strcmp-1.ll
new file mode 100644
index 00000000000..4dfda047280
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcmp-1.ll
@@ -0,0 +1,104 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s --check-prefix=NOBCMP
+; RUN: opt < %s -instcombine -mtriple=unknown-unknown-linux-gnu -S | FileCheck %s --check-prefix=BCMP
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strcmp(i8*, i8*)
+
+; strcmp("", x) -> -*x
+define i32 @test1(i8* %str2) {
+; CHECK-LABEL: @test1(
+; CHECK: %strcmpload = load i8, i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub nsw i32 0, %1
+; CHECK: ret i32 %2
+
+  %str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+
+}
+
+; strcmp(x, "") -> *x
+define i32 @test2(i8* %str1) {
+; CHECK-LABEL: @test2(
+; CHECK: %strcmpload = load i8, i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)  -> cnst
+define i32 @test3() {
+; CHECK-LABEL: @test3(
+; CHECK: ret i32 -1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+define i32 @test4() {
+; CHECK-LABEL: @test4(
+; CHECK: ret i32 1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)   -> memcmp(x, y, <known length>)
+; (This transform is rather difficult to trigger in a useful manner)
+define i32 @test5(i1 %b) {
+; CHECK-LABEL: @test5(
+; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; CHECK: ret i32 %memcmp
+
+  %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
+  %str2 = select i1 %b, i8* %temp1, i8* %temp2
+  %temp3 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp3
+}
+
+; strcmp(x,x)  -> 0
+define i32 @test6(i8* %str) {
+; CHECK-LABEL: @test6(
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strcmp(i8* %str, i8* %str)
+  ret i32 %temp1
+}
+
+; strcmp(x, y) == 0  -> bcmp(x, y, <known length>)
+define i1 @test7(i1 %b) {
+; BCMP-LABEL: @test7(
+; BCMP: %bcmp = call i32 @bcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; BCMP: %res = icmp eq i32 %bcmp, 0
+; BCMP: ret i1 %res
+
+; NOBCMP-LABEL: @test7(
+; NOBCMP: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; NOBCMP: %res = icmp eq i32 %memcmp, 0
+; NOBCMP: ret i1 %res
+
+  %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
+  %str2 = select i1 %b, i8* %temp1, i8* %temp2
+  %temp3 = call i32 @strcmp(i8* %str1, i8* %str2)
+  %res = icmp eq i32 %temp3, 0
+  ret i1 %res
+}
diff --git a/llvm/test/Transforms/InstCombine/strcmp-2.ll b/llvm/test/Transforms/InstCombine/strcmp-2.ll
new file mode 100644
index 00000000000..a537b10e0de
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcmp-2.ll
@@ -0,0 +1,20 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+
+declare i16 @strcmp(i8*, i8*)
+
+define i16 @test_nosimplify() {
+; CHECK-LABEL: @test_nosimplify(
+; CHECK: call i16 @strcmp
+; CHECK: ret i16 %temp1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i16 @strcmp(i8* %str1, i8* %str2)
+  ret i16 %temp1
+}
diff --git a/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll b/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
new file mode 100644
index 00000000000..092a47fe1db
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcmp-memcmp.ll
@@ -0,0 +1,560 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@key = constant [4 x i8] c"key\00", align 1
+@abc = constant [8 x i8] c"abc\00de\00\00", align 1
+
+declare void @use(i32)
+
+define i32 @strcmp_memcmp([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+declare i32 @strcmp(i8* nocapture, i8* nocapture)
+
+define i32 @strcmp_memcmp2([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp2(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp3([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp3(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  %cmp = icmp ne i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp4([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp4(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string)
+  %cmp = icmp ne i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp5([5 x i8]* dereferenceable (5) %buf) {
+; CHECK-LABEL: @strcmp_memcmp5(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [5 x i8], [5 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [5 x i8], [5 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull align 1 %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp6([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp6(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  %cmp = icmp sgt i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp7([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp7(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[MEMCMP_LOBIT:%.*]] = lshr i32 [[MEMCMP]], 31
+; CHECK-NEXT:    ret i32 [[MEMCMP_LOBIT]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string)
+  %cmp = icmp slt i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp8([4 x i8]* dereferenceable (4) %buf) {
+; CHECK-LABEL: @strcmp_memcmp8(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp9([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp9(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0))
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+
+define i32 @strncmp_memcmp([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+declare i32 @strncmp(i8* nocapture, i8* nocapture, i64)
+
+define i32 @strncmp_memcmp2([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp2(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 11)
+  %cmp = icmp ne i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp3([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp3(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 11)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp4([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp4(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 5)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp5([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp5(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 5)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+
+define i32 @strncmp_memcmp6([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp6(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 5)
+  %cmp = icmp ne i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp7([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp7(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 4)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp8([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp8(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 3)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 3)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp9([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp9(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 5)
+  %cmp = icmp sgt i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp10([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp10(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[MEMCMP_LOBIT:%.*]] = lshr i32 [[MEMCMP]], 31
+; CHECK-NEXT:    ret i32 [[MEMCMP_LOBIT]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 5)
+  %cmp = icmp slt i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp11([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp11(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 12)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp12([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp12(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 12)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp13([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp13(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 2)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* nonnull %string, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 2)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp14([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp14(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* nonnull %string, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @abc, i64 0, i64 0), i64 12)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; Negative tests
+define i32 @strcmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp_bad(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CALL]], 3
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  %cmp = icmp sgt i32 %call, 3
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp_bad2(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CALL]], 3
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string)
+  %cmp = icmp slt i32 %call, 3
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp_bad3([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strcmp_memcmp_bad3(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  ret i32 %call
+}
+
+
+define i32 @strcmp_memcmp_bad4(i8* nocapture readonly %buf) {
+; CHECK-LABEL: @strcmp_memcmp_bad4(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* [[BUF:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* %buf)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+
+define i32 @strcmp_memcmp_bad5([3 x i8]* dereferenceable (3) %buf) {
+; CHECK-LABEL: @strcmp_memcmp_bad5(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [3 x i8], [3 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [3 x i8], [3 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp_bad6([4 x i8]* dereferenceable (4) %buf, i8* nocapture readonly %k) {
+; CHECK-LABEL: @strcmp_memcmp_bad6(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* nonnull [[STRING]], i8* [[K:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* %k)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp_bad7(i8* nocapture readonly %k) {
+; CHECK-LABEL: @strcmp_memcmp_bad7(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* [[K:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @strcmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* %k)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strcmp_memcmp_bad8([4 x i8]* dereferenceable (4) %buf) {
+; CHECK-LABEL: @strcmp_memcmp_bad8(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+; CHECK-NEXT:    tail call void @use(i32 [[CALL]])
+; CHECK-NEXT:    ret i32 0
+;
+  %string = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  tail call void @use(i32 %call)
+  ret i32 0
+}
+
+define i32 @strncmp_memcmp_bad([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp_bad(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 5)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CALL]], 3
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 5)
+  %cmp = icmp sgt i32 %call, 3
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+
+define i32 @strncmp_memcmp_bad1([12 x i8]* dereferenceable (12) %buf) {
+; CHECK-LABEL: @strncmp_memcmp_bad1(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 5)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CALL]], 3
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 5)
+  %cmp = icmp slt i32 %call, 3
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp_bad2([12 x i8]* dereferenceable (12) %buf, i64 %n) {
+; CHECK-LABEL: @strncmp_memcmp_bad2(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull [[STRING]], i64 [[N:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CALL]], 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* nonnull %string, i64 %n)
+  %cmp = icmp slt i32 %call, 1
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp_bad3(i8* nocapture readonly %k) {
+; CHECK-LABEL: @strncmp_memcmp_bad3(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* [[K:%.*]], i64 2)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @strncmp(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i8* %k, i64 2)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @strncmp_memcmp_bad4([4 x i8]* dereferenceable (4) %buf) {
+; CHECK-LABEL: @strncmp_memcmp_bad4(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [4 x i8], [4 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strncmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)
+; CHECK-NEXT:    tail call void @use(i32 [[CALL]])
+; CHECK-NEXT:    ret i32 0
+;
+  %string = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strncmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0), i64 2)
+  tail call void @use(i32 %call)
+  ret i32 0
+}
+
+define i32 @strcmp_memcmp_msan([12 x i8]* dereferenceable (12) %buf) sanitize_memory {
+; CHECK-LABEL: @strcmp_memcmp_msan(
+; CHECK-NEXT:    [[STRING:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[BUF:%.*]], i64 0, i64 0
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(i8* nonnull [[STRING]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    ret i32 [[CONV]]
+;
+  %string = getelementptr inbounds [12 x i8], [12 x i8]* %buf, i64 0, i64 0
+  %call = call i32 @strcmp(i8* nonnull %string, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @key, i64 0, i64 0))
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
diff --git a/llvm/test/Transforms/InstCombine/strcpy-1.ll b/llvm/test/Transforms/InstCombine/strcpy-1.ll
new file mode 100644
index 00000000000..24c70c18fc0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcpy-1.ll
@@ -0,0 +1,45 @@
+; Test that the strcpy library call simplifier works correctly.
+; rdar://6839935
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcpy(i8*, i8*)
+
+define void @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+  ret void
+}
+
+define i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+
+  %ret = call i8* @strcpy(i8* %dst, i8* %dst)
+; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0)
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0
+
+  %ret = call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: call i8* @strcpy
+  ret i8* %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/strcpy-2.ll b/llvm/test/Transforms/InstCombine/strcpy-2.ll
new file mode 100644
index 00000000000..cfc8a410b7e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcpy-2.ll
@@ -0,0 +1,22 @@
+; Test that the strcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strcpy(i8*, i8*)
+
+define void @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  call i16* @strcpy(i8* %dst, i8* %src)
+; CHECK: call i16* @strcpy
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
new file mode 100644
index 00000000000..859d810d3c4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -0,0 +1,103 @@
+; Test lib call simplification of __strcpy_chk calls with various values
+; for src, dst, and slen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [12 x i8] c"abcdefghijk\00"
+
+; Check cases where slen >= strlen (src).
+
+define i8* @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+  ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12)
+  ret i8* %ret
+}
+
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
+; Check cases where there are no string constants.
+
+define i8* @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: %strcpy = call i8* @strcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0))
+; CHECK-NEXT: ret i8* %strcpy
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+  ret i8* %ret
+}
+
+; Check case where the string length is not constant.
+
+define i8* @test_simplify5() {
+; CHECK-LABEL: @test_simplify5(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false, i1 false)
+; CHECK-NEXT: %1 = call i8* @__memcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 %len)
+; CHECK-NEXT: ret i8* %1
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false, i1 false, i1 false)
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
+  ret i8* %ret
+}
+
+; Check case where the source and destination are the same.
+
+define i8* @test_simplify6() {
+; CHECK-LABEL: @test_simplify6(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+
+; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false, i1 false)
+; CHECK-NEXT: %ret = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i32 %len)
+; CHECK-NEXT: ret i8* %ret
+  %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false, i1 false, i1 false)
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
+  ret i8* %ret
+}
+
+; Check case where slen < strlen (src).
+
+define i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: %ret = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0), i32 8)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+  ret i8* %ret
+}
+
+declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) nounwind readonly
diff --git a/llvm/test/Transforms/InstCombine/strcpy_chk-2.ll b/llvm/test/Transforms/InstCombine/strcpy_chk-2.ll
new file mode 100644
index 00000000000..c2204a830ba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcpy_chk-2.ll
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __strcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i16] zeroinitializer, align 1
+@.str = private constant [8 x i8] c"abcdefg\00"
+
+define void @test_no_simplify() {
+; CHECK-LABEL: @test_no_simplify(
+  %dst = getelementptr inbounds [60 x i16], [60 x i16]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [8 x i8], [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strcpy_chk
+  call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8)
+  ret void
+}
+
+declare i16* @__strcpy_chk(i16*, i8*, i32)
diff --git a/llvm/test/Transforms/InstCombine/strcpy_chk-64.ll b/llvm/test/Transforms/InstCombine/strcpy_chk-64.ll
new file mode 100644
index 00000000000..6ff063b2b2b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcpy_chk-64.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @func(i8* %i) nounwind ssp {
+; CHECK-LABEL: @func(
+; CHECK: @__strcpy_chk(i8* nonnull %arraydecay, i8* %i, i64 32)
+entry:
+  %s = alloca [32 x i8], align 16
+  %arraydecay = getelementptr inbounds [32 x i8], [32 x i8]* %s, i32 0, i32 0
+  %call = call i8* @__strcpy_chk(i8* %arraydecay, i8* %i, i64 32)
+  call void @func2(i8* %arraydecay)
+  ret void
+}
+
+define void @func_no_null_opt(i8* %i) nounwind ssp #0 {
+; CHECK-LABEL: @func_no_null_opt(
+; CHECK: @__strcpy_chk(i8* %arraydecay, i8* %i, i64 32)
+entry:
+  %s = alloca [32 x i8], align 16
+  %arraydecay = getelementptr inbounds [32 x i8], [32 x i8]* %s, i32 0, i32 0
+  %call = call i8* @__strcpy_chk(i8* %arraydecay, i8* %i, i64 32)
+  call void @func2(i8* %arraydecay)
+  ret void
+}
+
+declare i8* @__strcpy_chk(i8*, i8*, i64) nounwind
+
+declare void @func2(i8*)
+
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/InstCombine/strcspn-1.ll b/llvm/test/Transforms/InstCombine/strcspn-1.ll
new file mode 100644
index 00000000000..8d441a9eecd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcspn-1.ll
@@ -0,0 +1,57 @@
+; Test that the strcspn library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@abcba = constant [6 x i8] c"abcba\00"
+@abc = constant [4 x i8] c"abc\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i64 @strcspn(i8*, i8*)
+
+; Check strcspn(s, "") -> strlen(s).
+
+define i64 @test_simplify1(i8* %str) {
+; CHECK-LABEL: @test_simplify1(
+  %pat = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call i64 @strcspn(i8* %str, i8* %pat)
+; CHECK-NEXT: [[VAR:%[a-z]+]] = call i64 @strlen(i8* %str)
+  ret i64 %ret
+; CHECK-NEXT: ret i64 [[VAR]]
+}
+
+; Check strcspn("", s) -> 0.
+
+define i64 @test_simplify2(i8* %pat) {
+; CHECK-LABEL: @test_simplify2(
+  %str = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call i64 @strcspn(i8* %str, i8* %pat)
+  ret i64 %ret
+; CHECK-NEXT: ret i64 0
+}
+
+; Check strcspn(s1, s2), where s1 and s2 are constants.
+
+define i64 @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %str = getelementptr [6 x i8], [6 x i8]* @abcba, i32 0, i32 0
+  %pat = getelementptr [4 x i8], [4 x i8]* @abc, i32 0, i32 0
+
+  %ret = call i64 @strcspn(i8* %str, i8* %pat)
+  ret i64 %ret
+; CHECK-NEXT: ret i64 0
+}
+
+; Check cases that shouldn't be simplified.
+
+define i64 @test_no_simplify1(i8* %str, i8* %pat) {
+; CHECK-LABEL: @test_no_simplify1(
+
+  %ret = call i64 @strcspn(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i64 @strcspn(i8* %str, i8* %pat)
+  ret i64 %ret
+; CHECK-NEXT: ret i64 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/strcspn-2.ll b/llvm/test/Transforms/InstCombine/strcspn-2.ll
new file mode 100644
index 00000000000..749860afcd4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcspn-2.ll
@@ -0,0 +1,21 @@
+; Test that the strcspn library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@null = constant [1 x i8] zeroinitializer
+
+declare double @strcspn(i8*, i8*)
+
+; Check that strcspn functions with the wrong prototype aren't simplified.
+
+define double @test_no_simplify1(i8* %pat) {
+; CHECK-LABEL: @test_no_simplify1(
+  %str = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call double @strcspn(i8* %str, i8* %pat)
+; CHECK-NEXT: call double @strcspn
+  ret double %ret
+; CHECK-NEXT: ret double %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/strlen-1.ll b/llvm/test/Transforms/InstCombine/strlen-1.ll
new file mode 100644
index 00000000000..aaf1b89945a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strlen-1.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test that the strlen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@longer = constant [7 x i8] c"longer\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+@nullstring = constant i8 0
+@a = common global [32 x i8] zeroinitializer, align 1
+@null_hello_mid = constant [13 x i8] c"hello wor\00ld\00"
+
+declare i32 @strlen(i8*)
+
+; Check strlen(string constant) -> integer constant.
+
+define i32 @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT:    ret i32 5
+;
+  %hello_p = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  ret i32 %hello_l
+}
+
+define i32 @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT:    ret i32 0
+;
+  %null_p = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %null_l = call i32 @strlen(i8* %null_p)
+  ret i32 %null_l
+}
+
+define i32 @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    ret i32 0
+;
+  %null_hello_p = getelementptr [7 x i8], [7 x i8]* @null_hello, i32 0, i32 0
+  %null_hello_l = call i32 @strlen(i8* %null_hello_p)
+  ret i32 %null_hello_l
+}
+
+define i32 @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+; CHECK-NEXT:    ret i32 0
+;
+  %len = tail call i32 @strlen(i8* @nullstring) nounwind
+  ret i32 %len
+}
+
+; Check strlen(x) == 0 --> *x == 0.
+
+define i1 @test_simplify5() {
+; CHECK-LABEL: @test_simplify5(
+; CHECK-NEXT:    ret i1 false
+;
+  %hello_p = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  %eq_hello = icmp eq i32 %hello_l, 0
+  ret i1 %eq_hello
+}
+
+define i1 @test_simplify6(i8* %str_p) {
+; CHECK-LABEL: @test_simplify6(
+; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i8, i8* [[STR_P:%.*]], align 1
+; CHECK-NEXT:    [[EQ_NULL:%.*]] = icmp eq i8 [[STRLENFIRST]], 0
+; CHECK-NEXT:    ret i1 [[EQ_NULL]]
+;
+  %str_l = call i32 @strlen(i8* %str_p)
+  %eq_null = icmp eq i32 %str_l, 0
+  ret i1 %eq_null
+}
+
+; Check strlen(x) != 0 --> *x != 0.
+
+define i1 @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    ret i1 true
+;
+  %hello_p = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  %ne_hello = icmp ne i32 %hello_l, 0
+  ret i1 %ne_hello
+}
+
+define i1 @test_simplify8(i8* %str_p) {
+; CHECK-LABEL: @test_simplify8(
+; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i8, i8* [[STR_P:%.*]], align 1
+; CHECK-NEXT:    [[NE_NULL:%.*]] = icmp ne i8 [[STRLENFIRST]], 0
+; CHECK-NEXT:    ret i1 [[NE_NULL]]
+;
+  %str_l = call i32 @strlen(i8* %str_p)
+  %ne_null = icmp ne i32 %str_l, 0
+  ret i1 %ne_null
+}
+
+define i32 @test_simplify9(i1 %x) {
+; CHECK-LABEL: @test_simplify9(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 %x, i32 5, i32 6
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %hello = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %longer = getelementptr [7 x i8], [7 x i8]* @longer, i32 0, i32 0
+  %s = select i1 %x, i8* %hello, i8* %longer
+  %l = call i32 @strlen(i8* %s)
+  ret i32 %l
+}
+
+; Check the case that should be simplified to a sub instruction.
+; strlen(@hello + x) --> 5 - x
+
+define i32 @test_simplify10(i32 %x) {
+; CHECK-LABEL: @test_simplify10(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 5, %x
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %hello_p = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 %x
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  ret i32 %hello_l
+}
+
+; strlen(@null_hello_mid + (x & 7)) --> 9 - (x & 7)
+
+define i32 @test_simplify11(i32 %x) {
+; CHECK-LABEL: @test_simplify11(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 %x, 7
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nuw nsw i32 9, [[AND]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %and = and i32 %x, 7
+  %hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  ret i32 %hello_l
+}
+
+; Check cases that shouldn't be simplified.
+
+define i32 @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT:    [[A_L:%.*]] = call i32 @strlen(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0))
+; CHECK-NEXT:    ret i32 [[A_L]]
+;
+  %a_p = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %a_l = call i32 @strlen(i8* %a_p)
+  ret i32 %a_l
+}
+
+; strlen(@null_hello + x) should not be simplified to a sub instruction.
+
+define i32 @test_no_simplify2(i32 %x) {
+; CHECK-LABEL: @test_no_simplify2(
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* @null_hello, i32 0, i32 %x
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(i8* nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i32 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds [7 x i8], [7 x i8]* @null_hello, i32 0, i32 %x
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  ret i32 %hello_l
+}
+
+define i32 @test_no_simplify2_no_null_opt(i32 %x) #0 {
+; CHECK-LABEL: @test_no_simplify2_no_null_opt(
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* @null_hello, i32 0, i32 %x
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(i8* [[HELLO_P]])
+; CHECK-NEXT:    ret i32 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds [7 x i8], [7 x i8]* @null_hello, i32 0, i32 %x
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  ret i32 %hello_l
+}
+
+; strlen(@null_hello_mid + (x & 15)) should not be simplified to a sub instruction.
+
+define i32 @test_no_simplify3(i32 %x) {
+; CHECK-LABEL: @test_no_simplify3(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 %x, 15
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 [[AND]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(i8* nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i32 [[HELLO_L]]
+;
+  %and = and i32 %x, 15
+  %hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  ret i32 %hello_l
+}
+
+define i32 @test_no_simplify3_on_null_opt(i32 %x) #0 {
+; CHECK-LABEL: @test_no_simplify3_on_null_opt(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 %x, 15
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 [[AND]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(i8* [[HELLO_P]])
+; CHECK-NEXT:    ret i32 [[HELLO_L]]
+;
+  %and = and i32 %x, 15
+  %hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and
+  %hello_l = call i32 @strlen(i8* %hello_p)
+  ret i32 %hello_l
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/InstCombine/strlen-2.ll b/llvm/test/Transforms/InstCombine/strlen-2.ll
new file mode 100644
index 00000000000..df5eee0caef
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strlen-2.ll
@@ -0,0 +1,18 @@
+; Test that the strlen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+
+declare i32 @strlen(i8*, i32)
+
+define i32 @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %hello_p = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %hello_l = call i32 @strlen(i8* %hello_p, i32 187)
+; CHECK-NEXT: %hello_l = call i32 @strlen
+  ret i32 %hello_l
+; CHECK-NEXT: ret i32 %hello_l
+}
diff --git a/llvm/test/Transforms/InstCombine/strncat-1.ll b/llvm/test/Transforms/InstCombine/strncat-1.ll
new file mode 100644
index 00000000000..4b8da81f6fb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncat-1.ll
@@ -0,0 +1,37 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+
+declare i8* @strncat(i8*, i8*, i32)
+declare i32 @puts(i8*)
+
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK-NOT: call i8* @strncat
+; CHECK: call i32 @puts
+
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
+  store i8 0, i8* %arg1
+
+  ; rslt1 = strncat(target, "hello\00")
+  %arg2 = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strncat(i8* %arg1, i8* %arg2, i32 6)
+
+  ; rslt2 = strncat(rslt1, "\00")
+  %arg3 = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %rslt2 = call i8* @strncat(i8* %rslt1, i8* %arg3, i32 42)
+
+  ; rslt3 = strncat(rslt2, "\00hello\00")
+  %arg4 = getelementptr [7 x i8], [7 x i8]* @null_hello, i32 0, i32 0
+  %rslt3 = call i8* @strncat(i8* %rslt2, i8* %arg4, i32 42)
+
+  call i32 @puts(i8* %rslt3)
+  ret i32 0
+}
diff --git a/llvm/test/Transforms/InstCombine/strncat-2.ll b/llvm/test/Transforms/InstCombine/strncat-2.ll
new file mode 100644
index 00000000000..3533e1eace7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncat-2.ll
@@ -0,0 +1,53 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strncat(i8*, i8*, i32)
+
+define void @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NOT: call i8* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 0)
+  ret void
+}
+
+define void @test_nosimplify1() {
+; CHECK-LABEL: @test_nosimplify1(
+; CHECK: call i8* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 1)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strncat-3.ll b/llvm/test/Transforms/InstCombine/strncat-3.ll
new file mode 100644
index 00000000000..0f6964df915
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncat-3.ll
@@ -0,0 +1,22 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strncat(i8*, i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK-LABEL: @test_nosimplify1(
+; CHECK: call i16* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  call i16* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strncmp-1.ll b/llvm/test/Transforms/InstCombine/strncmp-1.ll
new file mode 100644
index 00000000000..a1121821fdf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncmp-1.ll
@@ -0,0 +1,99 @@
+; Test that the strncmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strncmp(i8*, i8*, i32)
+
+; strncmp("", x, n) -> -*x
+define i32 @test1(i8* %str2) {
+; CHECK-LABEL: @test1(
+; CHECK: %strcmpload = load i8, i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub nsw i32 0, %1
+; CHECK: ret i32 %2
+
+  %str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+; strncmp(x, "", n) -> *x
+define i32 @test2(i8* %str1) {
+; CHECK-LABEL: @test2(
+; CHECK: %strcmpload = load i8, i8* %str1
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+; strncmp(x, y, n)  -> cnst
+define i32 @test3() {
+; CHECK-LABEL: @test3(
+; CHECK: ret i32 -1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+define i32 @test4() {
+; CHECK-LABEL: @test4(
+; CHECK: ret i32 1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+define i32 @test5() {
+; CHECK-LABEL: @test5(
+; CHECK: ret i32 0
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 4)
+  ret i32 %temp1
+}
+
+; strncmp(x,y,1) -> memcmp(x,y,1)
+define i32 @test6(i8* %str1, i8* %str2) {
+; CHECK-LABEL: @test6(
+; CHECK: [[LOAD1:%[a-z]+]] = load i8, i8* %str1, align 1
+; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
+; CHECK: [[LOAD2:%[a-z]+]] = load i8, i8* %str2, align 1
+; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
+; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
+; CHECK: ret i32 [[RET]]
+
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
+  ret i32 %temp1
+}
+
+; strncmp(x,y,0)   -> 0
+define i32 @test7(i8* %str1, i8* %str2) {
+; CHECK-LABEL: @test7(
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 0)
+  ret i32 %temp1
+}
+
+; strncmp(x,x,n)  -> 0
+define i32 @test8(i8* %str, i32 %n) {
+; CHECK-LABEL: @test8(
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strncmp(i8* %str, i8* %str, i32 %n)
+  ret i32 %temp1
+}
diff --git a/llvm/test/Transforms/InstCombine/strncmp-2.ll b/llvm/test/Transforms/InstCombine/strncmp-2.ll
new file mode 100644
index 00000000000..9e19781e04c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncmp-2.ll
@@ -0,0 +1,20 @@
+; Test that the strncmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+
+declare i16 @strncmp(i8*, i8*, i32)
+
+define i16 @test_nosimplify() {
+; CHECK-LABEL: @test_nosimplify(
+; CHECK: call i16 @strncmp
+; CHECK: ret i16 %temp1
+
+  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i16 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i16 %temp1
+}
diff --git a/llvm/test/Transforms/InstCombine/strncmp-wrong-datalayout.ll b/llvm/test/Transforms/InstCombine/strncmp-wrong-datalayout.ll
new file mode 100644
index 00000000000..5c7c49a9708
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncmp-wrong-datalayout.ll
@@ -0,0 +1,16 @@
+; Test that the strncpy simplification doesn't crash if datalayout specifies
+; 64 bit pointers while length is a 32 bit argument
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i32 @strncmp(i8*, i8*, i32)
+
+define i32 @test6(i8* %str1, i8* %str2) {
+; CHECK-LABEL: @test6(
+; CHECK: call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
+
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
+  ret i32 %temp1
+}
diff --git a/llvm/test/Transforms/InstCombine/strncpy-1.ll b/llvm/test/Transforms/InstCombine/strncpy-1.ll
new file mode 100644
index 00000000000..28cb26b1704
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncpy-1.ll
@@ -0,0 +1,95 @@
+; Test that the strncpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strncpy(i8*, i8*, i32)
+declare i32 @puts(i8*)
+
+; Check a bunch of strncpy invocations together.
+
+define i32 @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NOT: call i8* @strncpy
+; CHECK: call i32 @puts
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
+  store i8 0, i8* %arg1
+
+  %arg2 = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strncpy(i8* %arg1, i8* %arg2, i32 6)
+
+  %arg3 = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %rslt2 = call i8* @strncpy(i8* %rslt1, i8* %arg3, i32 42)
+
+  %arg4 = getelementptr [7 x i8], [7 x i8]* @null_hello, i32 0, i32 0
+  %rslt3 = call i8* @strncpy(i8* %rslt2, i8* %arg4, i32 42)
+
+  call i32 @puts( i8* %rslt3 )
+  ret i32 0
+}
+
+; Check strncpy(x, "", y) -> memset(x, '\0', y, 1).
+
+define void @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+
+  call i8* @strncpy(i8* %dst, i8* %src, i32 32)
+; CHECK: call void @llvm.memset.p0i8.i32
+  ret void
+}
+
+; Check strncpy(x, y, 0) -> x.
+
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  %ret = call i8* @strncpy(i8* %dst, i8* %src, i32 0)
+  ret i8* %ret
+; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0)
+}
+
+; Check  strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant].
+
+define void @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  call i8* @strncpy(i8* %dst, i8* %src, i32 6)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32
+  ret void
+}
+
+; Check cases that shouldn't be simplified.
+
+define void @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0
+
+  call i8* @strncpy(i8* %dst, i8* %src, i32 32)
+; CHECK: call i8* @strncpy
+  ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK-LABEL: @test_no_simplify2(
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  call i8* @strncpy(i8* %dst, i8* %src, i32 8)
+; CHECK: call i8* @strncpy
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strncpy-2.ll b/llvm/test/Transforms/InstCombine/strncpy-2.ll
new file mode 100644
index 00000000000..5c45f9f24ae
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncpy-2.ll
@@ -0,0 +1,22 @@
+; Test that the strncpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strncpy(i8*, i8*, i32)
+
+; Check that 'strncpy' functions with the wrong prototype aren't simplified.
+
+define void @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
+
+  call i16* @strncpy(i8* %dst, i8* %src, i32 6)
+; CHECK: call i16* @strncpy
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll
new file mode 100644
index 00000000000..bc3ff35ef05
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll
@@ -0,0 +1,71 @@
+; Test lib call simplification of __strncpy_chk calls with various values
+; for len and dstlen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [12 x i8] c"abcdefghijk\00"
+
+; Check cases where dstlen >= len
+
+define i8* @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
+  ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12)
+  ret i8* %ret
+}
+
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: %strncpy = call i8* @strncpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0), i32 12)
+; CHECK-NEXT: ret i8* %strncpy
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
+  ret i8* %ret
+}
+
+; Check cases where dstlen < len
+
+define i8* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: %ret = call i8* @__strncpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 8, i32 4)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4)
+  ret i8* %ret
+}
+
+define i8* @test_no_simplify2() {
+; CHECK-LABEL: @test_no_simplify2(
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: %ret = call i8* @__strncpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i32 0, i32 0), i32 8, i32 0)
+; CHECK-NEXT: ret i8* %ret
+  %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0)
+  ret i8* %ret
+}
+
+declare i8* @__strncpy_chk(i8*, i8*, i32, i32)
diff --git a/llvm/test/Transforms/InstCombine/strncpy_chk-2.ll b/llvm/test/Transforms/InstCombine/strncpy_chk-2.ll
new file mode 100644
index 00000000000..89ecd46c04a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncpy_chk-2.ll
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __strncpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i16] zeroinitializer, align 1
+@b = common global [60 x i16] zeroinitializer, align 1
+
+define void @test_no_simplify() {
+; CHECK-LABEL: @test_no_simplify(
+  %dst = getelementptr inbounds [60 x i16], [60 x i16]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i16], [60 x i16]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strncpy_chk
+  call i16* @__strncpy_chk(i16* %dst, i16* %src, i32 60, i32 60)
+  ret void
+}
+
+declare i16* @__strncpy_chk(i16*, i16*, i32, i32)
diff --git a/llvm/test/Transforms/InstCombine/strpbrk-1.ll b/llvm/test/Transforms/InstCombine/strpbrk-1.ll
new file mode 100644
index 00000000000..a61100deb87
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strpbrk-1.ll
@@ -0,0 +1,68 @@
+; Test that the strpbrk library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [12 x i8] c"hello world\00"
+@w = constant [2 x i8] c"w\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i8* @strpbrk(i8*, i8*)
+
+; Check strpbrk(s, "") -> NULL.
+
+define i8* @test_simplify1(i8* %str) {
+; CHECK-LABEL: @test_simplify1(
+  %pat = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* null
+}
+
+; Check strpbrk("", s) -> NULL.
+
+define i8* @test_simplify2(i8* %pat) {
+; CHECK-LABEL: @test_simplify2(
+  %str = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* null
+}
+
+; Check strpbrk(s1, s2), where s1 and s2 are constants.
+
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %str = getelementptr [12 x i8], [12 x i8]* @hello, i32 0, i32 0
+  %pat = getelementptr [2 x i8], [2 x i8]* @w, i32 0, i32 0
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* getelementptr inbounds ([12 x i8], [12 x i8]* @hello, i32 0, i32 6)
+}
+
+; Check strpbrk(s, "a") -> strchr(s, 'a').
+
+define i8* @test_simplify4(i8* %str) {
+; CHECK-LABEL: @test_simplify4(
+  %pat = getelementptr [2 x i8], [2 x i8]* @w, i32 0, i32 0
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: [[VAR:%[a-z]+]] = call i8* @strchr(i8* %str, i32 119)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* [[VAR]]
+}
+
+; Check cases that shouldn't be simplified.
+
+define i8* @test_no_simplify1(i8* %str, i8* %pat) {
+; CHECK-LABEL: @test_no_simplify1(
+
+  %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/strpbrk-2.ll b/llvm/test/Transforms/InstCombine/strpbrk-2.ll
new file mode 100644
index 00000000000..0af6faf40bc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strpbrk-2.ll
@@ -0,0 +1,23 @@
+; Test that the strpbrk library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [12 x i8] c"hello world\00"
+@w = constant [2 x i8] c"w\00"
+
+declare i16* @strpbrk(i8*, i8*)
+
+; Check that 'strpbrk' functions with the wrong prototype aren't simplified.
+
+define i16* @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %str = getelementptr [12 x i8], [12 x i8]* @hello, i32 0, i32 0
+  %pat = getelementptr [2 x i8], [2 x i8]* @w, i32 0, i32 0
+
+  %ret = call i16* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i16* @strpbrk
+  ret i16* %ret
+; CHECK-NEXT: ret i16* %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/strrchr-1.ll b/llvm/test/Transforms/InstCombine/strrchr-1.ll
new file mode 100644
index 00000000000..3ae68fba96d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strrchr-1.ll
@@ -0,0 +1,65 @@
+; Test that the strrchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@chp = global i8* zeroinitializer
+
+declare i8* @strrchr(i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 6)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: store i8* null, i8** @chp, align 4
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 0)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify4() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 65280)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_nosimplify1(i32 %chr) {
+; CHECK-LABEL: @test_nosimplify1(
+; CHECK: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 %chr)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strrchr-2.ll b/llvm/test/Transforms/InstCombine/strrchr-2.ll
new file mode 100644
index 00000000000..4c203d0e8e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strrchr-2.ll
@@ -0,0 +1,21 @@
+; Test that the strrchr libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chr = global i8 zeroinitializer
+
+declare i8 @strrchr(i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: test_nosimplify1
+; CHECK: call i8 @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8 @strrchr(i8* %str, i32 119)
+  store i8 %dst, i8* @chr
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/strspn-1.ll b/llvm/test/Transforms/InstCombine/strspn-1.ll
new file mode 100644
index 00000000000..3d9c573ac61
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strspn-1.ll
@@ -0,0 +1,56 @@
+; Test that the strspn library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@abcba = constant [6 x i8] c"abcba\00"
+@abc = constant [4 x i8] c"abc\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i64 @strspn(i8*, i8*)
+
+; Check strspn(s, "") -> 0.
+
+define i64 @test_simplify1(i8* %str) {
+; CHECK-LABEL: @test_simplify1(
+  %pat = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call i64 @strspn(i8* %str, i8* %pat)
+  ret i64 %ret
+; CHECK-NEXT: ret i64 0
+}
+
+; Check strspn("", s) -> 0.
+
+define i64 @test_simplify2(i8* %pat) {
+; CHECK-LABEL: @test_simplify2(
+  %str = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
+
+  %ret = call i64 @strspn(i8* %str, i8* %pat)
+  ret i64 %ret
+; CHECK-NEXT: ret i64 0
+}
+
+; Check strspn(s1, s2), where s1 and s2 are constants.
+
+define i64 @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %str = getelementptr [6 x i8], [6 x i8]* @abcba, i32 0, i32 0
+  %pat = getelementptr [4 x i8], [4 x i8]* @abc, i32 0, i32 0
+
+  %ret = call i64 @strspn(i8* %str, i8* %pat)
+  ret i64 %ret
+; CHECK-NEXT: ret i64 5
+}
+
+; Check cases that shouldn't be simplified.
+
+define i64 @test_no_simplify1(i8* %str, i8* %pat) {
+; CHECK-LABEL: @test_no_simplify1(
+
+  %ret = call i64 @strspn(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i64 @strspn(i8* %str, i8* %pat)
+  ret i64 %ret
+; CHECK-NEXT: ret i64 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/strstr-1.ll b/llvm/test/Transforms/InstCombine/strstr-1.ll
new file mode 100644
index 00000000000..d57e56ccdaf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strstr-1.ll
@@ -0,0 +1,65 @@
+; Test that the strstr library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@.str = private constant [1 x i8] zeroinitializer
+@.str1 = private constant [2 x i8] c"a\00"
+@.str2 = private constant [6 x i8] c"abcde\00"
+@.str3 = private constant [4 x i8] c"bcd\00"
+
+declare i8* @strstr(i8*, i8*)
+
+; Check strstr(str, "") -> str.
+
+define i8* @test_simplify1(i8* %str) {
+; CHECK-LABEL: @test_simplify1(
+  %pat = getelementptr inbounds [1 x i8], [1 x i8]* @.str, i32 0, i32 0
+  %ret = call i8* @strstr(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* %str
+}
+
+; Check strstr(str, "a") -> strchr(str, 'a').
+
+define i8* @test_simplify2(i8* %str) {
+; CHECK-LABEL: @test_simplify2(
+  %pat = getelementptr inbounds [2 x i8], [2 x i8]* @.str1, i32 0, i32 0
+  %ret = call i8* @strstr(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: @strchr(i8* %str, i32 97)
+}
+
+; Check strstr("abcde", "bcd") -> "abcde" + 1.
+
+define i8* @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %str = getelementptr inbounds [6 x i8], [6 x i8]* @.str2, i32 0, i32 0
+  %pat = getelementptr inbounds [4 x i8], [4 x i8]* @.str3, i32 0, i32 0
+  %ret = call i8* @strstr(i8* %str, i8* %pat)
+  ret i8* %ret
+; CHECK-NEXT: getelementptr inbounds ([6 x i8], [6 x i8]* @.str2, i64 0, i64 1)
+}
+
+; Check strstr(str, str) -> str.
+
+define i8* @test_simplify4(i8* %str) {
+; CHECK-LABEL: @test_simplify4(
+  %ret = call i8* @strstr(i8* %str, i8* %str)
+  ret i8* %ret
+; CHECK-NEXT: ret i8* %str
+}
+
+; Check strstr(str, pat) == str -> strncmp(str, pat, strlen(str)) == 0.
+
+define i1 @test_simplify5(i8* %str, i8* %pat) {
+; CHECK-LABEL: @test_simplify5(
+  %ret = call i8* @strstr(i8* %str, i8* %pat)
+  %cmp = icmp eq i8* %ret, %str
+  ret i1 %cmp
+; CHECK: [[LEN:%[a-z]+]] = call {{i[0-9]+}} @strlen(i8* %pat)
+; CHECK: [[NCMP:%[a-z]+]] = call {{i[0-9]+}} @strncmp(i8* %str, i8* %pat, {{i[0-9]+}} [[LEN]])
+; CHECK: icmp eq {{i[0-9]+}} [[NCMP]], 0
+; CHECK: ret i1
+}
diff --git a/llvm/test/Transforms/InstCombine/strstr-2.ll b/llvm/test/Transforms/InstCombine/strstr-2.ll
new file mode 100644
index 00000000000..9cb33115c2e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strstr-2.ll
@@ -0,0 +1,18 @@
+; Test that the strstr library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@null = private constant [1 x i8] zeroinitializer
+
+declare i8 @strstr(i8*, i8*)
+
+define i8 @test_no_simplify1(i8* %str) {
+; CHECK-LABEL: @test_no_simplify1(
+  %pat = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
+  %ret = call i8 @strstr(i8* %str, i8* %pat)
+; CHECK-NEXT: call i8 @strstr
+  ret i8 %ret
+; CHECK-NEXT: ret i8 %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/strto-1.ll b/llvm/test/Transforms/InstCombine/strto-1.ll
new file mode 100644
index 00000000000..96f36e8d89c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strto-1.ll
@@ -0,0 +1,82 @@
+; Test that the strto* library call simplifiers works correctly.
+;
+; RUN: opt < %s -instcombine -inferattrs -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i64 @strtol(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtol(i8* readonly, i8** nocapture, i32)
+
+declare double @strtod(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare double @strtod(i8* readonly, i8** nocapture, i32)
+
+declare float @strtof(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare float @strtof(i8* readonly, i8** nocapture, i32)
+
+declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoul(i8* readonly, i8** nocapture, i32)
+
+declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoll(i8* readonly, i8** nocapture, i32)
+
+declare double @strtold(i8* %s, i8** %endptr)
+; CHECK: declare double @strtold(i8* readonly, i8** nocapture)
+
+declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoull(i8* readonly, i8** nocapture, i32)
+
+define void @test_simplify1(i8* %x, i8** %endptr) {
+; CHECK-LABEL: @test_simplify1(
+  call i64 @strtol(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtol(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify2(i8* %x, i8** %endptr) {
+; CHECK-LABEL: @test_simplify2(
+  call double @strtod(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call double @strtod(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify3(i8* %x, i8** %endptr) {
+; CHECK-LABEL: @test_simplify3(
+  call float @strtof(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call float @strtof(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify4(i8* %x, i8** %endptr) {
+; CHECK-LABEL: @test_simplify4(
+  call i64 @strtoul(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoul(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify5(i8* %x, i8** %endptr) {
+; CHECK-LABEL: @test_simplify5(
+  call i64 @strtoll(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoll(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_simplify6(i8* %x, i8** %endptr) {
+; CHECK-LABEL: @test_simplify6(
+  call double @strtold(i8* %x, i8** null)
+; CHECK-NEXT: call double @strtold(i8* nocapture %x, i8** null)
+  ret void
+}
+
+define void @test_simplify7(i8* %x, i8** %endptr) {
+; CHECK-LABEL: @test_simplify7(
+  call i64 @strtoull(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoull(i8* nocapture %x, i8** null, i32 10)
+  ret void
+}
+
+define void @test_no_simplify1(i8* %x, i8** %endptr) {
+; CHECK-LABEL: @test_no_simplify1(
+  call i64 @strtol(i8* %x, i8** %endptr, i32 10)
+; CHECK-NEXT: call i64 @strtol(i8* %x, i8** %endptr, i32 10)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa-new.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa-new.ll
new file mode 100644
index 00000000000..d3f319523ce
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa-new.ll
@@ -0,0 +1,53 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+;
+; Verify that instcombine preserves TBAA tags when converting a memcpy into
+; a scalar load and store.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+%A = type { float }
+
+define void @test1(%A* %a1, %A* %a2) {
+entry:
+; CHECK-LABEL: @test1
+; CHECK: %[[LOAD:.*]] = load i32, {{.*}}, !tbaa [[TAG_A:!.*]]
+; CHECK: store i32 %[[LOAD]], {{.*}}, !tbaa [[TAG_A]]
+; CHECK: ret
+  %0 = bitcast %A* %a1 to i8*
+  %1 = bitcast %A* %a2 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 4, i1 false), !tbaa !4  ; TAG_A
+  ret void
+}
+
+%B = type { i32 (i8*, i32*, double*)** }
+
+define i32 (i8*, i32*, double*)*** @test2() {
+; CHECK-LABEL: @test2
+; CHECK-NOT: memcpy
+; CHECK: ret
+  %tmp = alloca %B, align 8
+  %tmp1 = bitcast %B* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp1, i8* align 8 undef, i64 8, i1 false), !tbaa !7  ; TAG_B
+  %tmp2 = getelementptr %B, %B* %tmp, i32 0, i32 0
+  %tmp3 = load i32 (i8*, i32*, double*)**, i32 (i8*, i32*, double*)*** %tmp2
+  ret i32 (i8*, i32*, double*)*** %tmp2
+}
+
+!0 = !{!"root"}
+!1 = !{!0, !"char"}
+!2 = !{!1, !"float"}
+!3 = !{!1, i64 4, !"A", !2, i64 0, i64 4}
+!4 = !{!3, !3, i64 0, i64 4}
+!5 = !{!1, !"pointer"}
+!6 = !{!1, i64 8, !"B", !5, i64 0, i64 8}
+!7 = !{!6, !6, i64 0, i64 8}
+
+; CHECK-DAG: [[ROOT:!.*]] = !{!"root"}
+; CHECK-DAG: [[TYPE_char:!.*]] = !{[[ROOT]], !"char"}
+; CHECK-DAG: [[TYPE_float:!.*]] = !{[[TYPE_char]], !"float"}
+; CHECK-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 4, !"A", [[TYPE_float]], i64 0, i64 4}
+; CHECK-DAG: [[TAG_A]] = !{[[TYPE_A]], [[TYPE_A]], i64 0, i64 4}
+; Note that the memcpy() call in test2() transforms into an
+; undecorated 'store undef', so TAG_B is not present in the output.
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
new file mode 100644
index 00000000000..09a3f435e8f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -0,0 +1,46 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+; Verify that instcombine preserves TBAA tags when converting a memcpy into
+; a scalar load and store.
+
+%struct.test1 = type { float }
+
+; CHECK: @test
+; CHECK: %[[LOAD:.*]] = load i32, i32* %{{.*}}, align 4, !tbaa !0
+; CHECK: store i32 %[[LOAD:.*]], i32* %{{.*}}, align 4, !tbaa !0
+; CHECK: ret
+define void @test1(%struct.test1* nocapture %a, %struct.test1* nocapture %b) {
+entry:
+  %0 = bitcast %struct.test1* %a to i8*
+  %1 = bitcast %struct.test1* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 4, i1 false), !tbaa.struct !3
+  ret void
+}
+
+%struct.test2 = type { i32 (i8*, i32*, double*)** }
+
+define i32 (i8*, i32*, double*)*** @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NOT: memcpy
+; CHECK: ret
+  %tmp = alloca %struct.test2, align 8
+  %tmp1 = bitcast %struct.test2* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp1, i8* align 8 undef, i64 8, i1 false), !tbaa.struct !4
+  %tmp2 = getelementptr %struct.test2, %struct.test2* %tmp, i32 0, i32 0
+  %tmp3 = load i32 (i8*, i32*, double*)**, i32 (i8*, i32*, double*)*** %tmp2
+  ret i32 (i8*, i32*, double*)*** %tmp2
+}
+
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"float", !2}
+
+!0 = !{!"Simple C/C++ TBAA"}
+!1 = !{!"omnipotent char", !0}
+!2 = !{!5, !5, i64 0}
+!3 = !{i64 0, i64 4, !2}
+!4 = !{i64 0, i64 8, null}
+!5 = !{!"float", !0}
diff --git a/llvm/test/Transforms/InstCombine/sub-minmax.ll b/llvm/test/Transforms/InstCombine/sub-minmax.ll
new file mode 100644
index 00000000000..ccc3483ce95
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sub-minmax.ll
@@ -0,0 +1,355 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @max_na_b_minux_na(i32 %A, i32 %B) {
+; CHECK-LABEL: @max_na_b_minux_na(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[L0:%.*]] = icmp ult i32 [[NOT]], [[B:%.*]]
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 [[B]]
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[L1]], [[NOT]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %l0 = icmp ult i32 %not, %B
+  %l1 = select i1 %l0, i32 %not, i32 %B
+  %x = sub i32 %l1, %not
+  ret i32 %x
+}
+
+define i32 @na_minus_max_na_b(i32 %A, i32 %B) {
+; CHECK-LABEL: @na_minus_max_na_b(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[L0:%.*]] = icmp ult i32 [[NOT]], [[B:%.*]]
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 [[B]]
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[NOT]], [[L1]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %l0 = icmp ult i32 %not, %B
+  %l1 = select i1 %l0, i32 %not, i32 %B
+  %x = sub i32 %not, %l1
+  ret i32 %x
+}
+
+define i32 @max_b_na_minus_na(i32 %A, i32 %B) {
+; CHECK-LABEL: @max_b_na_minus_na(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[L0:%.*]] = icmp ugt i32 [[NOT]], [[B:%.*]]
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L0]], i32 [[B]], i32 [[NOT]]
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[L1]], [[NOT]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %l0 = icmp ugt i32 %not, %B
+  %l1 = select i1 %l0, i32 %B, i32 %not
+  %x = sub i32 %l1, %not
+  ret i32 %x
+}
+
+define i32 @na_minus_max_b_na(i32 %A, i32 %B) {
+; CHECK-LABEL: @na_minus_max_b_na(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[L0:%.*]] = icmp ugt i32 [[NOT]], [[B:%.*]]
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L0]], i32 [[B]], i32 [[NOT]]
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[NOT]], [[L1]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %l0 = icmp ugt i32 %not, %B
+  %l1 = select i1 %l0, i32 %B, i32 %not
+  %x = sub i32 %not, %l1
+  ret i32 %x
+}
+
+
+define i32 @max_na_bi_minux_na(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @max_na_bi_minux_na(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], [[BI:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[A]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %B =  xor i32 %Bi, -1
+  %not = xor i32 %A, -1
+  %l0 = icmp ult i32 %not, %B
+  %l1 = select i1 %l0, i32 %not, i32 %B
+  %x = sub i32 %l1, %not
+  ret i32 %x
+}
+
+define i32 @na_minus_max_na_bi(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @na_minus_max_na_bi(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], [[BI:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[TMP2]], [[A]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %B =  xor i32 %Bi, -1
+  %not = xor i32 %A, -1
+  %l0 = icmp ult i32 %not, %B
+  %l1 = select i1 %l0, i32 %not, i32 %B
+  %x = sub i32 %not, %l1
+  ret i32 %x
+}
+
+define i32 @max_bi_na_minus_na(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @max_bi_na_minus_na(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], [[BI:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[A]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %B =  xor i32 %Bi, -1
+  %not = xor i32 %A, -1
+  %l0 = icmp ugt i32 %not, %B
+  %l1 = select i1 %l0, i32 %B, i32 %not
+  %x = sub i32 %l1, %not
+  ret i32 %x
+}
+
+define i32 @na_minus_max_bi_na(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @na_minus_max_bi_na(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], [[BI:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[TMP2]], [[A]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %B =  xor i32 %Bi, -1
+  %not = xor i32 %A, -1
+  %l0 = icmp ugt i32 %not, %B
+  %l1 = select i1 %l0, i32 %B, i32 %not
+  %x = sub i32 %not, %l1
+  ret i32 %x
+}
+
+
+define i32 @max_na_bi_minux_na_use(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @max_na_bi_minux_na_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], -32
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32
+; CHECK-NEXT:    [[L1:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[A]], [[TMP2]]
+; CHECK-NEXT:    call void @use32(i32 [[L1]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %l0 = icmp ult i32 %not, 31
+  %l1 = select i1 %l0, i32 %not, i32 31
+  %x = sub i32 %l1, %not
+  call void @use32(i32 %l1)
+  ret i32 %x
+}
+
+define i32 @na_minus_max_na_bi_use(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @na_minus_max_na_bi_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], -32
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32
+; CHECK-NEXT:    [[L1:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[TMP2]], [[A]]
+; CHECK-NEXT:    call void @use32(i32 [[L1]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %l0 = icmp ult i32 %not, 31
+  %l1 = select i1 %l0, i32 %not, i32 31
+  %x = sub i32 %not, %l1
+  call void @use32(i32 %l1)
+  ret i32 %x
+}
+
+define i32 @max_bi_na_minus_na_use(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @max_bi_na_minus_na_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], [[BI:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
+; CHECK-NEXT:    [[L1:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[A]], [[TMP2]]
+; CHECK-NEXT:    call void @use32(i32 [[L1]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %B = xor i32 %Bi, -1
+  %l0 = icmp ult i32 %B, %not
+  %l1 = select i1 %l0, i32 %B, i32 %not
+  %x = sub i32 %l1, %not
+  call void @use32(i32 %l1)
+  ret i32 %x
+}
+
+define i32 @na_minus_max_bi_na_use(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @na_minus_max_bi_na_use(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A:%.*]], [[BI:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
+; CHECK-NEXT:    [[L1:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[TMP2]], [[A]]
+; CHECK-NEXT:    call void @use32(i32 [[L1]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %B = xor i32 %Bi, -1
+  %l0 = icmp ult i32 %B, %not
+  %l1 = select i1 %l0, i32 %B, i32 %not
+  %x = sub i32 %not, %l1
+  call void @use32(i32 %l1)
+  ret i32 %x
+}
+
+
+define i32 @max_na_bi_minux_na_use2(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @max_na_bi_minux_na_use2(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[L0:%.*]] = icmp ult i32 [[NOT]], 31
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 31
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[L1]], [[NOT]]
+; CHECK-NEXT:    call void @use32(i32 [[L1]])
+; CHECK-NEXT:    call void @use32(i32 [[NOT]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %l0 = icmp ult i32 %not, 31
+  %l1 = select i1 %l0, i32 %not, i32 31
+  %x = sub i32 %l1, %not
+  call void @use32(i32 %l1)
+  call void @use32(i32 %not)
+  ret i32 %x
+}
+
+define i32 @na_minus_max_na_bi_use2(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @na_minus_max_na_bi_use2(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[L0:%.*]] = icmp ult i32 [[NOT]], 31
+; CHECK-NEXT:    [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 31
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[NOT]], [[L1]]
+; CHECK-NEXT:    call void @use32(i32 [[L1]])
+; CHECK-NEXT:    call void @use32(i32 [[NOT]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %l0 = icmp ult i32 %not, 31
+  %l1 = select i1 %l0, i32 %not, i32 31
+  %x = sub i32 %not, %l1
+  call void @use32(i32 %l1)
+  call void @use32(i32 %not)
+  ret i32 %x
+}
+
+define i32 @max_bi_na_minus_na_use2(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @max_bi_na_minus_na_use2(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[BI:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
+; CHECK-NEXT:    [[L1:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[A]], [[TMP2]]
+; CHECK-NEXT:    call void @use32(i32 [[L1]])
+; CHECK-NEXT:    call void @use32(i32 [[NOT]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %B = xor i32 %Bi, -1
+  %l0 = icmp ult i32 %B, %not
+  %l1 = select i1 %l0, i32 %B, i32 %not
+  %x = sub i32 %l1, %not
+  call void @use32(i32 %l1)
+  call void @use32(i32 %not)
+  ret i32 %x
+}
+
+define i32 @na_minus_max_bi_na_use2(i32 %A, i32 %Bi) {
+; CHECK-LABEL: @na_minus_max_bi_na_use2(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[BI:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
+; CHECK-NEXT:    [[L1:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    [[X:%.*]] = sub i32 [[TMP2]], [[A]]
+; CHECK-NEXT:    call void @use32(i32 [[L1]])
+; CHECK-NEXT:    call void @use32(i32 [[NOT]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %not = xor i32 %A, -1
+  %B = xor i32 %Bi, -1
+  %l0 = icmp ult i32 %B, %not
+  %l1 = select i1 %l0, i32 %B, i32 %not
+  %x = sub i32 %not, %l1
+  call void @use32(i32 %l1)
+  call void @use32(i32 %not)
+  ret i32 %x
+}
+
+define i8 @umin_not_sub(i8 %x, i8 %y) {
+; CHECK-LABEL: @umin_not_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Y]]
+; CHECK-NEXT:    [[MINXY:%.*]] = xor i8 [[TMP2]], -1
+; CHECK-NEXT:    [[SUBX:%.*]] = sub i8 [[TMP2]], [[X]]
+; CHECK-NEXT:    [[SUBY:%.*]] = sub i8 [[TMP2]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[SUBX]])
+; CHECK-NEXT:    call void @use8(i8 [[SUBY]])
+; CHECK-NEXT:    ret i8 [[MINXY]]
+;
+  %nx = xor i8 %x, -1
+  %ny = xor i8 %y, -1
+  %cmpxy = icmp ult i8 %nx, %ny
+  %minxy = select i1 %cmpxy, i8 %nx, i8 %ny
+  %subx = sub i8 %nx, %minxy
+  %suby = sub i8 %ny, %minxy
+  call void @use8(i8 %subx)
+  call void @use8(i8 %suby)
+  ret i8 %minxy
+}
+
+define i8 @umin_not_sub_rev(i8 %x, i8 %y) {
+; CHECK-LABEL: @umin_not_sub_rev(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Y]]
+; CHECK-NEXT:    [[MINXY:%.*]] = xor i8 [[TMP2]], -1
+; CHECK-NEXT:    [[SUBX:%.*]] = sub i8 [[X]], [[TMP2]]
+; CHECK-NEXT:    [[SUBY:%.*]] = sub i8 [[Y]], [[TMP2]]
+; CHECK-NEXT:    call void @use8(i8 [[SUBX]])
+; CHECK-NEXT:    call void @use8(i8 [[SUBY]])
+; CHECK-NEXT:    ret i8 [[MINXY]]
+;
+  %nx = xor i8 %x, -1
+  %ny = xor i8 %y, -1
+  %cmpxy = icmp ult i8 %nx, %ny
+  %minxy = select i1 %cmpxy, i8 %nx, i8 %ny
+  %subx = sub i8 %minxy, %nx
+  %suby = sub i8 %minxy, %ny
+  call void @use8(i8 %subx)
+  call void @use8(i8 %suby)
+  ret i8 %minxy
+}
+
+define void @umin3_not_all_ops_extra_uses_invert_subs(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @umin3_not_all_ops_extra_uses_invert_subs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Z]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i8 [[TMP4]], -1
+; CHECK-NEXT:    [[XMIN:%.*]] = sub i8 [[TMP4]], [[X]]
+; CHECK-NEXT:    [[YMIN:%.*]] = sub i8 [[TMP4]], [[Y]]
+; CHECK-NEXT:    [[ZMIN:%.*]] = sub i8 [[TMP4]], [[Z]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP5]])
+; CHECK-NEXT:    call void @use8(i8 [[XMIN]])
+; CHECK-NEXT:    call void @use8(i8 [[YMIN]])
+; CHECK-NEXT:    call void @use8(i8 [[ZMIN]])
+; CHECK-NEXT:    ret void
+;
+  %xn = xor i8 %x, -1
+  %yn = xor i8 %y, -1
+  %zn = xor i8 %z, -1
+  %cmpxz = icmp ult i8 %xn, %zn
+  %minxz = select i1 %cmpxz, i8 %xn, i8 %zn
+  %cmpxyz = icmp ult i8 %minxz, %yn
+  %minxyz = select i1 %cmpxyz, i8 %minxz, i8 %yn
+  %xmin = sub i8 %xn, %minxyz
+  %ymin = sub i8 %yn, %minxyz
+  %zmin = sub i8 %zn, %minxyz
+  call void @use8(i8 %minxyz)
+  call void @use8(i8 %xmin)
+  call void @use8(i8 %ymin)
+  call void @use8(i8 %zmin)
+  ret void
+}
+
+declare void @use8(i8)
+declare void @use32(i32 %u)
diff --git a/llvm/test/Transforms/InstCombine/sub-not.ll b/llvm/test/Transforms/InstCombine/sub-not.ll
new file mode 100644
index 00000000000..cd1f8f3bd52
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sub-not.ll
@@ -0,0 +1,145 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @use(i8)
+
+define i8 @sub_not(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_not(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = sub i8 %x, %y
+  %r = xor i8 %s, -1
+  ret i8 %r
+}
+
+define i8 @sub_not_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_not_extra_use(
+; CHECK-NEXT:    [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[S]], -1
+; CHECK-NEXT:    call void @use(i8 [[S]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = sub i8 %x, %y
+  %r = xor i8 %s, -1
+  call void @use(i8 %s)
+  ret i8 %r
+}
+
+define <2 x i8> @sub_not_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sub_not_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %s = sub <2 x i8> %x, %y
+  %r = xor <2 x i8> %s, <i8 -1, i8 undef>
+  ret <2 x i8> %r
+}
+
+define i8 @dec_sub(i8 %x, i8 %y) {
+; CHECK-LABEL: @dec_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = sub i8 %x, %y
+  %r = add i8 %s, -1
+  ret i8 %r
+}
+
+define i8 @dec_sub_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @dec_sub_extra_use(
+; CHECK-NEXT:    [[S:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[S]], -1
+; CHECK-NEXT:    call void @use(i8 [[S]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = sub i8 %x, %y
+  %r = add i8 %s, -1
+  call void @use(i8 %s)
+  ret i8 %r
+}
+
+define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @dec_sub_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %s = sub <2 x i8> %x, %y
+  %r = add <2 x i8> %s, <i8 -1, i8 undef>
+  ret <2 x i8> %r
+}
+
+define i8 @sub_inc(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_inc(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = add i8 %x, 1
+  %r = sub i8 %y, %s
+  ret i8 %r
+}
+
+define i8 @sub_inc_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_inc_extra_use(
+; CHECK-NEXT:    [[S:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[Y:%.*]], [[S]]
+; CHECK-NEXT:    call void @use(i8 [[S]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = add i8 %x, 1
+  %r = sub i8 %y, %s
+  call void @use(i8 %s)
+  ret i8 %r
+}
+
+define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sub_inc_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %s = add <2 x i8> %x, <i8 undef, i8 1>
+  %r = sub <2 x i8> %y, %s
+  ret <2 x i8> %r
+}
+
+define i8 @sub_dec(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_dec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = add i8 %x, -1
+  %r = sub i8 %s, %y
+  ret i8 %r
+}
+
+define i8 @sub_dec_extra_use(i8 %x, i8 %y) {
+; CHECK-LABEL: @sub_dec_extra_use(
+; CHECK-NEXT:    [[S:%.*]] = add i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[S]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[S]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %s = add i8 %x, -1
+  %r = sub i8 %s, %y
+  call void @use(i8 %s)
+  ret i8 %r
+}
+
+define <2 x i8> @sub_dec_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @sub_dec_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %s = add <2 x i8> %x, <i8 undef, i8 -1>
+  %r = sub <2 x i8> %s, %y
+  ret <2 x i8> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll
new file mode 100644
index 00000000000..dda5c748540
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sub-xor.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test1(i32 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 %x, 31
+; CHECK-NEXT:    [[SUB:%.*]] = xor i32 [[AND]], 63
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %and = and i32 %x, 31
+  %sub = sub i32 63, %and
+  ret i32 %sub
+}
+
+define <2 x i32> @test1vec(<2 x i32> %x) {
+; CHECK-LABEL: @test1vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> %x, <i32 31, i32 31>
+; CHECK-NEXT:    [[SUB:%.*]] = xor <2 x i32> [[AND]], <i32 63, i32 63>
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %and = and <2 x i32> %x, <i32 31, i32 31>
+  %sub = sub <2 x i32> <i32 63, i32 63>, %and
+  ret <2 x i32> %sub
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+
+define i32 @test2(i32 %x) nounwind {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[COUNT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; CHECK-NEXT:    [[SUB:%.*]] = xor i32 [[COUNT]], 31
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %count = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) nounwind readnone
+  %sub = sub i32 31, %count
+  ret i32 %sub
+}
+
+define i32 @test3(i32 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 %x, 31
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw nsw i32 73, [[AND]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %and = and i32 %x, 31
+  %sub = xor i32 31, %and
+  %add = add i32 %sub, 42
+  ret i32 %add
+}
+
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
new file mode 100644
index 00000000000..6e1f34868f2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -0,0 +1,1294 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @sub_constant(i32 %x) {
+; CHECK-LABEL: @sub_constant(
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[X:%.*]], -42
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = sub i32 %x, 42
+  ret i32 %r
+}
+
+@g = global i32 0
+
+define i32 @sub_constant_expression(i32 %x) {
+; CHECK-LABEL: @sub_constant_expression(
+; CHECK-NEXT:    [[R:%.*]] = sub i32 [[X:%.*]], ptrtoint (i32* @g to i32)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = sub i32 %x, ptrtoint (i32* @g to i32)
+  ret i32 %r
+}
+
+define <2 x i32> @sub_constant_vec(<2 x i32> %x) {
+; CHECK-LABEL: @sub_constant_vec(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[X:%.*]], <i32 -42, i32 12>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = sub <2 x i32> %x, <i32 42, i32 -12>
+  ret <2 x i32> %r
+}
+
+define <3 x i33> @sub_constant_vec_weird_type(<3 x i33> %x) {
+; CHECK-LABEL: @sub_constant_vec_weird_type(
+; CHECK-NEXT:    [[R:%.*]] = add <3 x i33> [[X:%.*]], <i33 42, i33 -42, i33 12>
+; CHECK-NEXT:    ret <3 x i33> [[R]]
+;
+  %r = sub <3 x i33> %x, <i33 -42, i33 42, i33 -12>
+  ret <3 x i33> %r
+}
+
+define <4 x i32> @sub_constant_expression_vec(<4 x i32> %x) {
+; CHECK-LABEL: @sub_constant_expression_vec(
+; CHECK-NEXT:    [[R:%.*]] = sub <4 x i32> [[X:%.*]], bitcast (i128 ptrtoint (i32* @g to i128) to <4 x i32>)
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %r = sub <4 x i32> %x, bitcast (i128 ptrtoint (i32* @g to i128) to <4 x i32>)
+  ret <4 x i32> %r
+}
+
+define i32 @neg_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: @neg_sub(
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %neg = sub i32 0, %x
+  %r = sub i32 %y, %neg
+  ret i32 %r
+}
+
+define i32 @neg_nsw_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: @neg_nsw_sub(
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %neg = sub nsw i32 0, %x
+  %r = sub i32 %y, %neg
+  ret i32 %r
+}
+
+define i32 @neg_sub_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: @neg_sub_nsw(
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %neg = sub i32 0, %x
+  %r = sub nsw i32 %y, %neg
+  ret i32 %r
+}
+
+define i32 @neg_nsw_sub_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: @neg_nsw_sub_nsw(
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %neg = sub nsw i32 0, %x
+  %r = sub nsw i32 %y, %neg
+  ret i32 %r
+}
+
+define <2 x i32> @neg_sub_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @neg_sub_vec(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %neg = sub <2 x i32> zeroinitializer, %x
+  %r = sub <2 x i32> %y, %neg
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @neg_nsw_sub_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @neg_nsw_sub_vec(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %neg = sub nsw <2 x i32> zeroinitializer, %x
+  %r = sub <2 x i32> %y, %neg
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @neg_sub_nsw_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @neg_sub_nsw_vec(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %neg = sub <2 x i32> zeroinitializer, %x
+  %r = sub nsw <2 x i32> %y, %neg
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @neg_nsw_sub_nsw_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @neg_nsw_sub_nsw_vec(
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %neg = sub nsw <2 x i32> zeroinitializer, %x
+  %r = sub nsw <2 x i32> %y, %neg
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @neg_sub_vec_undef(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @neg_sub_vec_undef(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %neg = sub <2 x i32> <i32 0, i32 undef>, %x
+  %r = sub <2 x i32> %y, %neg
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @neg_nsw_sub_vec_undef(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @neg_nsw_sub_vec_undef(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %neg = sub nsw <2 x i32> <i32 undef, i32 0>, %x
+  %r = sub <2 x i32> %y, %neg
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @neg_sub_nsw_vec_undef(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @neg_sub_nsw_vec_undef(
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %neg = sub <2 x i32> <i32 undef, i32 0>, %x
+  %r = sub nsw <2 x i32> %y, %neg
+  ret <2 x i32> %r
+}
+
+; This should not drop 'nsw'.
+
+define <2 x i32> @neg_nsw_sub_nsw_vec_undef(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @neg_nsw_sub_nsw_vec_undef(
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %neg = sub nsw <2 x i32> <i32 0, i32 undef>, %x
+  %r = sub nsw <2 x i32> %y, %neg
+  ret <2 x i32> %r
+}
+
+; (~X) - (~Y) --> Y - X
+; Also, show that we can handle extra uses and vectors.
+
+declare void @use8(i8)
+
+define i8 @notnotsub(i8 %x, i8 %y) {
+; CHECK-LABEL: @notnotsub(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[NY:%.*]] = xor i8 [[Y:%.*]], -1
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[Y]], [[X]]
+; CHECK-NEXT:    call void @use8(i8 [[NX]])
+; CHECK-NEXT:    call void @use8(i8 [[NY]])
+; CHECK-NEXT:    ret i8 [[SUB]]
+;
+  %nx = xor i8 %x, -1
+  %ny = xor i8 %y, -1
+  %sub = sub i8 %nx, %ny
+  call void @use8(i8 %nx)
+  call void @use8(i8 %ny)
+  ret i8 %sub
+}
+
+define <2 x i8> @notnotsub_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @notnotsub_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[SUB]]
+;
+  %nx = xor <2 x i8> %x, <i8 -1, i8 -1>
+  %ny = xor <2 x i8> %y, <i8 -1, i8 -1>
+  %sub = sub <2 x i8> %nx, %ny
+  ret <2 x i8> %sub
+}
+
+define <2 x i8> @notnotsub_vec_undef_elts(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @notnotsub_vec_undef_elts(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[SUB]]
+;
+  %nx = xor <2 x i8> %x, <i8 undef, i8 -1>
+  %ny = xor <2 x i8> %y, <i8 -1, i8 undef>
+  %sub = sub <2 x i8> %nx, %ny
+  ret <2 x i8> %sub
+}
+
+define i32 @test5(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[D1:%.*]] = sub i32 [[C:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = add i32 [[D1]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %D = sub i32 %B, %C
+  %E = sub i32 %A, %D
+  ret i32 %E
+}
+
+define i32 @test6(i32 %A, i32 %B) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[D:%.*]] = and i32 [[B_NOT]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = and i32 %A, %B
+  %D = sub i32 %A, %C
+  ret i32 %D
+}
+
+define i32 @test6commuted(i32 %A, i32 %B) {
+; CHECK-LABEL: @test6commuted(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[D:%.*]] = and i32 [[B_NOT]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = and i32 %B, %A
+  %D = sub i32 %A, %C
+  ret i32 %D
+}
+
+define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[B:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = sub i32 -1, %A
+  ret i32 %B
+}
+
+define i32 @test8(i32 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[A:%.*]], 3
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = mul i32 9, %A
+  %C = sub i32 %B, %A
+  ret i32 %C
+}
+
+define i32 @test9(i32 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = mul i32 [[A:%.*]], -2
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = mul i32 3, %A
+  %C = sub i32 %A, %B
+  ret i32 %C
+}
+
+define i1 @test11(i8 %A, i8 %B) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %C = sub i8 %A, %B
+  %D = icmp ne i8 %C, 0
+  ret i1 %D
+}
+
+define <2 x i1> @test11vec(<2 x i8> %A, <2 x i8> %B) {
+; CHECK-LABEL: @test11vec(
+; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %C = sub <2 x i8> %A, %B
+  %D = icmp ne <2 x i8> %C, zeroinitializer
+  ret <2 x i1> %D
+}
+
+define i32 @test12(i32 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[C:%.*]] = lshr i32 [[A:%.*]], 31
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = ashr i32 %A, 31
+  %C = sub i32 0, %B
+  ret i32 %C
+}
+
+define i32 @test13(i32 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[A:%.*]], 31
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = lshr i32 %A, 31
+  %C = sub i32 0, %B
+  ret i32 %C
+}
+
+define <2 x i32> @test12vec(<2 x i32> %A) {
+; CHECK-LABEL: @test12vec(
+; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
+  %B = ashr <2 x i32> %A, <i32 31, i32 31>
+  %C = sub <2 x i32> zeroinitializer, %B
+  ret <2 x i32> %C
+}
+
+define <2 x i32> @test13vec(<2 x i32> %A) {
+; CHECK-LABEL: @test13vec(
+; CHECK-NEXT:    [[C:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
+  %B = lshr <2 x i32> %A, <i32 31, i32 31>
+  %C = sub <2 x i32> zeroinitializer, %B
+  ret <2 x i32> %C
+}
+
+define i32 @test15(i32 %A, i32 %B) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[C:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = srem i32 [[B:%.*]], [[C]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = sub i32 0, %A
+  %D = srem i32 %B, %C
+  ret i32 %D
+}
+
+define i32 @test16(i32 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[A:%.*]], -1123
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %X = sdiv i32 %A, 1123
+  %Y = sub i32 0, %X
+  ret i32 %Y
+}
+
+; Can't fold subtract here because negation it might oveflow.
+; PR3142
+define i32 @test17(i32 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[B:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = sdiv i32 [[B]], 1234
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sub i32 0, %A
+  %C = sdiv i32 %B, 1234
+  ret i32 %C
+}
+
+define i64 @test18(i64 %Y) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    ret i64 0
+;
+  %tmp.4 = shl i64 %Y, 2
+  %tmp.12 = shl i64 %Y, 2
+  %tmp.8 = sub i64 %tmp.4, %tmp.12
+  ret i64 %tmp.8
+}
+
+define i1 @test20(i32 %g, i32 %h) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[TMP_4:%.*]] = icmp ne i32 [[H:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TMP_4]]
+;
+  %tmp.2 = sub i32 %g, %h
+  %tmp.4 = icmp ne i32 %tmp.2, %g
+  ret i1 %tmp.4
+}
+
+define i1 @test21(i32 %g, i32 %h) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[TMP_4:%.*]] = icmp ne i32 [[H:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TMP_4]]
+;
+  %tmp.2 = sub i32 %g, %h
+  %tmp.4 = icmp ne i32 %tmp.2, %g
+  ret i1 %tmp.4
+}
+
+; PR2298
+define zeroext i1 @test22(i32 %a, i32 %b)  nounwind  {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %tmp2 = sub i32 0, %a
+  %tmp4 = sub i32 0, %b
+  %tmp5 = icmp eq i32 %tmp2, %tmp4
+  ret i1 %tmp5
+}
+
+; rdar://7362831
+define i32 @test23(i8* %P, i64 %A){
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %B = getelementptr inbounds i8, i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %D = trunc i64 %C to i32
+  %E = ptrtoint i8* %P to i64
+  %F = trunc i64 %E to i32
+  %G = sub i32 %D, %F
+  ret i32 %G
+}
+
+define i8 @test23_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK-LABEL: @test23_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 [[A:%.*]] to i8
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %D = trunc i16 %C to i8
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %F = trunc i16 %E to i8
+  %G = sub i8 %D, %F
+  ret i8 %G
+}
+
+define i64 @test24(i8* %P, i64 %A){
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    ret i64 [[A:%.*]]
+;
+  %B = getelementptr inbounds i8, i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %E = ptrtoint i8* %P to i64
+  %G = sub i64 %C, %E
+  ret i64 %G
+}
+
+define i16 @test24_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK-LABEL: @test24_as1(
+; CHECK-NEXT:    ret i16 [[A:%.*]]
+;
+  %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %C, %E
+  ret i16 %G
+}
+
+define i64 @test24a(i8* %P, i64 %A){
+; CHECK-LABEL: @test24a(
+; CHECK-NEXT:    [[DIFF_NEG:%.*]] = sub i64 0, [[A:%.*]]
+; CHECK-NEXT:    ret i64 [[DIFF_NEG]]
+;
+  %B = getelementptr inbounds i8, i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %E = ptrtoint i8* %P to i64
+  %G = sub i64 %E, %C
+  ret i64 %G
+}
+
+define i16 @test24a_as1(i8 addrspace(1)* %P, i16 %A) {
+; CHECK-LABEL: @test24a_as1(
+; CHECK-NEXT:    [[DIFF_NEG:%.*]] = sub i16 0, [[A:%.*]]
+; CHECK-NEXT:    ret i16 [[DIFF_NEG]]
+;
+  %B = getelementptr inbounds i8, i8 addrspace(1)* %P, i16 %A
+  %C = ptrtoint i8 addrspace(1)* %B to i16
+  %E = ptrtoint i8 addrspace(1)* %P to i16
+  %G = sub i16 %E, %C
+  ret i16 %G
+}
+
+
+@Arr = external global [42 x i16]
+
+define i64 @test24b(i8* %P, i64 %A){
+; CHECK-LABEL: @test24b(
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl nuw i64 [[A:%.*]], 1
+; CHECK-NEXT:    ret i64 [[B_IDX]]
+;
+  %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A
+  %C = ptrtoint i16* %B to i64
+  %G = sub i64 %C, ptrtoint ([42 x i16]* @Arr to i64)
+  ret i64 %G
+}
+
+
+define i64 @test25(i8* %P, i64 %A){
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl nuw i64 [[A:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[B_IDX]], -84
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A
+  %C = ptrtoint i16* %B to i64
+  %G = sub i64 %C, ptrtoint (i16* getelementptr ([42 x i16], [42 x i16]* @Arr, i64 1, i64 0) to i64)
+  ret i64 %G
+}
+
+@Arr_as1 = external addrspace(1) global [42 x i16]
+
+define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
+; CHECK-LABEL: @test25_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl nuw i16 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[B_IDX]], -84
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A
+  %C = ptrtoint i16 addrspace(1)* %B to i16
+  %G = sub i16 %C, ptrtoint (i16 addrspace(1)* getelementptr ([42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 1, i64 0) to i16)
+  ret i16 %G
+}
+
+define i32 @test26(i32 %x) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[NEG:%.*]] = shl i32 -3, [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[NEG]]
+;
+  %shl = shl i32 3, %x
+  %neg = sub i32 0, %shl
+  ret i32 %neg
+}
+
+define i32 @test27(i32 %x, i32 %y) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 3
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %mul = mul i32 %y, -8
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+}
+
+define <2 x i32> @test27vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[Y:%.*]], <i32 8, i32 6>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> %y, <i32 -8, i32 -6>
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define <2 x i32> @test27vecsplat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27vecsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> %y, <i32 -8, i32 -8>
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define <2 x i32> @test27vecmixed(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27vecmixed(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[Y:%.*]], <i32 8, i32 -8>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> %y, <i32 -8, i32 8>
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define i32 @test27commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test27commuted(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[Y:%.*]], 3
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %mul = mul i32 -8, %y
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+}
+
+define <2 x i32> @test27commutedvec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27commutedvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[Y:%.*]], <i32 8, i32 6>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> <i32 -8, i32 -6>, %y
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define <2 x i32> @test27commutedvecsplat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27commutedvecsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> <i32 -8, i32 -8>, %y
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define <2 x i32> @test27commutedvecmixed(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test27commutedvecmixed(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[Y:%.*]], <i32 8, i32 -8>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %mul = mul <2 x i32> <i32 -8, i32 8>, %y
+  %sub = sub <2 x i32> %x, %mul
+  ret <2 x i32> %sub
+}
+
+define i32 @test28(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %neg = sub i32 0, %z
+  %mul = mul i32 %neg, %y
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+}
+
+define i32 @test28commuted(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test28commuted(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %neg = sub i32 0, %z
+  %mul = mul i32 %y, %neg
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+}
+
+define i64 @test29(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+}
+
+define i64 @test30(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i64 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
+  %cast1 = ptrtoint i32* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+}
+
+define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test30_as1(
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = shl nuw i16 [[I:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]]
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+  %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i
+  %gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 %j
+  %cast1 = ptrtoint i32 addrspace(1)* %gep1 to i16
+  %cast2 = ptrtoint i8 addrspace(1)* %gep2 to i16
+  %sub = sub i16 %cast1, %cast2
+  ret i16 %sub
+}
+
+define <2 x i64> @test31(<2 x i64> %A) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i64> [[A:%.*]], <i64 3, i64 4>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %xor = xor <2 x i64> %A, <i64 -1, i64 -1>
+  %sub = sub <2 x i64> <i64 2, i64 3>, %xor
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @test32(<2 x i64> %A) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i64> <i64 3, i64 4>, [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %add = add <2 x i64> %A, <i64 -1, i64 -1>
+  %sub = sub <2 x i64> <i64 2, i64 3>, %add
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @test35(<2 x i64> %A) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    [[SUB:%.*]] = mul <2 x i64> [[A:%.*]], <i64 -2, i64 -3>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %mul = mul <2 x i64> %A, <i64 3, i64 4>
+  %sub = sub <2 x i64> %A, %mul
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @test36(<2 x i64> %A) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    [[SUB:%.*]] = mul <2 x i64> [[A:%.*]], <i64 7, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %shl = shl <2 x i64> %A, <i64 3, i64 4>
+  %sub = sub <2 x i64> %shl, %A
+  ret <2 x i64> %sub
+}
+
+define <2 x i32> @test37(<2 x i32> %A) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[SUB]]
+;
+  %div = sdiv <2 x i32> %A, <i32 -2147483648, i32 -2147483648>
+  %sub = sub nsw <2 x i32> zeroinitializer, %div
+  ret <2 x i32> %sub
+}
+
+define i32 @test38(i32 %A) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[A:%.*]], -2147483648
+; CHECK-NEXT:    [[SUB:%.*]] = sext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %div = sdiv i32 %A, -2147483648
+  %sub = sub nsw i32 0, %div
+  ret i32 %sub
+}
+
+define i16 @test40(i16 %a, i16 %b) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i16 [[A:%.*]], 1
+; CHECK-NEXT:    [[ASHR1:%.*]] = ashr i16 [[B:%.*]], 1
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i16 [[ASHR]], [[ASHR1]]
+; CHECK-NEXT:    ret i16 [[SUB]]
+;
+  %ashr = ashr i16 %a, 1
+  %ashr1 = ashr i16 %b, 1
+  %sub = sub i16 %ashr, %ashr1
+  ret i16 %sub
+}
+
+define i32 @test41(i16 %a, i16 %b) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[B:%.*]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %conv = sext i16 %a to i32
+  %conv1 = sext i16 %b to i32
+  %sub = sub i32 %conv, %conv1
+  ret i32 %sub
+}
+
+define i4 @test42(i4 %x, i4 %y) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[A:%.*]] = and i4 [[Y:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = and i4 [[X:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = sub nsw i4 [[A]], [[B]]
+; CHECK-NEXT:    ret i4 [[C]]
+;
+  %a = and i4 %y, 7
+  %b = and i4 %x, 7
+  %c = sub i4 %a, %b
+  ret i4 %c
+}
+
+define i4 @test43(i4 %x, i4 %y) {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:    [[A:%.*]] = or i4 [[X:%.*]], -8
+; CHECK-NEXT:    [[B:%.*]] = and i4 [[Y:%.*]], 7
+; CHECK-NEXT:    [[C:%.*]] = sub nuw i4 [[A]], [[B]]
+; CHECK-NEXT:    ret i4 [[C]]
+;
+  %a = or i4 %x, -8
+  %b = and i4 %y, 7
+  %c = sub i4 %a, %b
+  ret i4 %c
+}
+
+define i32 @test44(i32 %x) {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[X:%.*]], -32768
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %sub = sub nsw i32 %x, 32768
+  ret i32 %sub
+}
+
+define i32 @test45(i32 %x, i32 %y) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %or = or i32 %x, %y
+  %xor = xor i32 %x, %y
+  %sub = sub i32 %or, %xor
+  ret i32 %sub
+}
+
+define i32 @test45commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test45commuted(
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %or = or i32 %x, %y
+  %xor = xor i32 %y, %x
+  %sub = sub i32 %or, %xor
+  ret i32 %sub
+}
+
+define i32 @test46(i32 %x, i32 %y) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[X_NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %or = or i32 %x, %y
+  %sub = sub i32 %or, %x
+  ret i32 %sub
+}
+
+define i32 @test46commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: @test46commuted(
+; CHECK-NEXT:    [[X_NOT:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[SUB:%.*]] = and i32 [[X_NOT]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %or = or i32 %y, %x
+  %sub = sub i32 %or, %x
+  ret i32 %sub
+}
+
+define i32 @test47(i1 %A, i32 %B, i32 %C, i32 %D) {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[A:%.*]], i32 [[TMP1]], i32 0
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %sel0 = select i1 %A, i32 %D, i32 %B
+  %sel1 = select i1 %A, i32 %C, i32 %B
+  %sub = sub i32 %sel0, %sel1
+  ret i32 %sub
+}
+
+define i32 @test48(i1 %A, i32 %B, i32 %C, i32 %D) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[A:%.*]], i32 0, i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %sel0 = select i1 %A, i32 %B, i32 %D
+  %sel1 = select i1 %A, i32 %B, i32 %C
+  %sub = sub i32 %sel0, %sel1
+  ret i32 %sub
+}
+
+define i32 @test49(i32 %X) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[SUB]], 64
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i32 129, %X
+  %res = and i32 %sub, 64
+  ret i32 %res
+}
+
+define i32 @test50(i32 %X) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[SUB]], 127
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i32 129, %X
+  %res = and i32 %sub, 127
+  ret i32 %res
+}
+
+define i32 @test51(i32 %X) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 126, [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[SUB]], 64
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i32 254, %X
+  %res = and i32 %sub, 64
+  ret i32 %res
+}
+
+define i32 @test52(i32 %X) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 126, [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[SUB]], 127
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %sub = sub i32 254, %X
+  %res = and i32 %sub, 127
+  ret i32 %res
+}
+
+define <2 x i1> @test53(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT:    [[SUB:%.*]] = xor <2 x i1> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[SUB]]
+;
+  %sub = sub <2 x i1> %A, %B
+  ret <2 x i1> %sub
+}
+
+define i32 @test54(i1 %C) {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 -877, i32 113
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = sub i32 123, %A
+  ret i32 %V
+}
+
+define <2 x i32> @test54vec(i1 %C) {
+; CHECK-LABEL: @test54vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 -877, i32 -877>, <2 x i32> <i32 113, i32 113>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = sub <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test54vec2(i1 %C) {
+; CHECK-LABEL: @test54vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 -877, i32 -2167>, <2 x i32> <i32 113, i32 303>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = sub <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %V
+}
+
+define i32 @test55(i1 %which) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = sub i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test55vec(i1 %which) {
+; CHECK-LABEL: @test55vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test55vec2(i1 %which) {
+; CHECK-LABEL: @test55vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 -877, i32 -2167>, [[ENTRY:%.*]] ], [ <i32 113, i32 303>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
+
+define i32 @test56(i32 %A, i32 %B) {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT:    [[Y:%.*]] = sub i32 0, [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %X = add i32 %A, %B
+  %Y = sub i32 %A, %X
+  ret i32 %Y
+}
+
+define i32 @test57(i32 %A, i32 %B) {
+; CHECK-LABEL: @test57(
+; CHECK-NEXT:    [[Y:%.*]] = sub i32 0, [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %X = add i32 %B, %A
+  %Y = sub i32 %A, %X
+  ret i32 %Y
+}
+
+@dummy_global1 = external global i8*
+@dummy_global2 = external global i8*
+
+define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) {
+; Note the reassociate pass and another instcombine pass will further optimize this to
+; "%sub = i64 %i, %j, ret i64 %sub"
+; gep1 and gep2 have only one use
+; CHECK-LABEL: @test58(
+; CHECK-NEXT:    [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200
+; CHECK-NEXT:    [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i
+  %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+}
+
+define i64 @test59([100 x [100 x i8]]* %foo, i64 %i) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* [[FOO:%.*]], i64 0, i64 42, i64 [[I:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* [[FOO]], i64 0, i64 42, i64 0
+; CHECK-NEXT:    store i8* [[GEP1]], i8** @dummy_global1, align 8
+; CHECK-NEXT:    store i8* [[GEP2]], i8** @dummy_global2, align 8
+; CHECK-NEXT:    ret i64 [[I]]
+;
+; gep1 and gep2 have more than one uses
+  %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i
+  %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  store i8* %gep1, i8** @dummy_global1
+  store i8* %gep2, i8** @dummy_global2
+  ret i64 %sub
+}
+
+define i64 @test60([100 x [100 x i8]]* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* [[FOO:%.*]], i64 0, i64 [[J:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* [[FOO]], i64 0, i64 42, i64 0
+; CHECK-NEXT:    [[CAST1:%.*]] = ptrtoint i8* [[GEP1]] to i64
+; CHECK-NEXT:    [[CAST2:%.*]] = ptrtoint i8* [[GEP2]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[CAST1]], [[CAST2]]
+; CHECK-NEXT:    store i8* [[GEP1]], i8** @dummy_global1, align 8
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+; gep1 has a non-constant index and more than one uses. Shouldn't duplicate the arithmetic.
+  %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 %j, i64 %i
+  %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  store i8* %gep1, i8** @dummy_global1
+  ret i64 %sub
+}
+
+define i64 @test61([100 x [100 x i8]]* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* [[FOO:%.*]], i64 0, i64 42, i64 0
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* [[FOO]], i64 0, i64 [[J:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[CAST1:%.*]] = ptrtoint i8* [[GEP1]] to i64
+; CHECK-NEXT:    [[CAST2:%.*]] = ptrtoint i8* [[GEP2]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[CAST1]], [[CAST2]]
+; CHECK-NEXT:    store i8* [[GEP2]], i8** @dummy_global2, align 8
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+; gep2 has a non-constant index and more than one uses. Shouldn't duplicate the arithmetic.
+  %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0
+  %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 %j, i64 %i
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  store i8* %gep2, i8** @dummy_global2
+  ret i64 %sub
+}
+
+define i32 @test62(i32 %A) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[A:%.*]], 1
+; CHECK-NEXT:    [[C:%.*]] = sub i32 2, [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sub i32 1, %A
+  %C = shl i32 %B, 1
+  ret i32 %C
+}
+
+define <2 x i32> @test62vec(<2 x i32> %A) {
+; CHECK-LABEL: @test62vec(
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[C:%.*]] = sub <2 x i32> <i32 2, i32 2>, [[B]]
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
+  %B = sub <2 x i32> <i32 1, i32 1>, %A
+  %C = shl <2 x i32> %B, <i32 1, i32 1>
+  ret <2 x i32> %C
+}
+
+define i32 @test63(i32 %A) {
+; CHECK-LABEL: @test63(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = sub i32 1, %A
+  %C = shl i32 %B, 1
+  %D = sub i32 2, %C
+  ret i32 %D
+}
+
+define <2 x i32> @test63vec(<2 x i32> %A) {
+; CHECK-LABEL: @test63vec(
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %B = sub <2 x i32> <i32 1, i32 1>, %A
+  %C = shl <2 x i32> %B, <i32 1, i32 1>
+  %D = sub <2 x i32> <i32 2, i32 2>, %C
+  ret <2 x i32> %D
+}
+
+; FIXME: Transform (neg (max ~X, C)) -> ((min X, ~C) + 1). Same for min.
+define i32 @test64(i32 %x) {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %1 = xor i32 %x, -1
+  %2 = icmp sgt i32 %1, -256
+  %3 = select i1 %2, i32 %1, i32 -256
+  %res = sub i32 0, %3
+  ret i32 %res
+}
+
+define i32 @test65(i32 %x) {
+; CHECK-LABEL: @test65(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -256
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -256
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %1 = xor i32 %x, -1
+  %2 = icmp slt i32 %1, 255
+  %3 = select i1 %2, i32 %1, i32 255
+  %res = sub i32 0, %3
+  ret i32 %res
+}
+
+define i32 @test66(i32 %x) {
+; CHECK-LABEL: @test66(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], -101
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -101
+; CHECK-NEXT:    [[RES:%.*]] = add nuw i32 [[TMP2]], 1
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %1 = xor i32 %x, -1
+  %2 = icmp ugt i32 %1, 100
+  %3 = select i1 %2, i32 %1, i32 100
+  %res = sub i32 0, %3
+  ret i32 %res
+}
+
+define i32 @test67(i32 %x) {
+; CHECK-LABEL: @test67(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 100
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 100
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP2]], 1
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %1 = xor i32 %x, -1
+  %2 = icmp ult i32 %1, -101
+  %3 = select i1 %2, i32 %1, i32 -101
+  %res = sub i32 0, %3
+  ret i32 %res
+}
+
+; Check splat vectors too
+define <2 x i32> @test68(<2 x i32> %x) {
+; CHECK-LABEL: @test68(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 255, i32 255>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> <i32 255, i32 255>
+; CHECK-NEXT:    [[RES:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
+  %1 = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %2 = icmp sgt <2 x i32> %1, <i32 -256, i32 -256>
+  %3 = select <2 x i1> %2, <2 x i32> %1, <2 x i32> <i32 -256, i32 -256>
+  %res = sub <2 x i32> zeroinitializer, %3
+  ret <2 x i32> %res
+}
+
+; And non-splat constant vectors.
+define <2 x i32> @test69(<2 x i32> %x) {
+; CHECK-LABEL: @test69(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 255, i32 127>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> <i32 255, i32 127>
+; CHECK-NEXT:    [[RES:%.*]] = add <2 x i32> [[TMP2]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[RES]]
+;
+  %1 = xor <2 x i32> %x, <i32 -1, i32 -1>
+  %2 = icmp sgt <2 x i32> %1, <i32 -256, i32 -128>
+  %3 = select <2 x i1> %2, <2 x i32> %1, <2 x i32> <i32 -256, i32 -128>
+  %res = sub <2 x i32> zeroinitializer, %3
+  ret <2 x i32> %res
+}
+
+define i32 @nsw_inference1(i32 %x, i32 %y) {
+; CHECK-LABEL: @nsw_inference1(
+; CHECK-NEXT:    [[X2:%.*]] = or i32 [[X:%.*]], 1024
+; CHECK-NEXT:    [[Y2:%.*]] = and i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[Z:%.*]] = sub nuw nsw i32 [[X2]], [[Y2]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %x2 = or i32 %x, 1024
+  %y2 = and i32 %y, 1
+  %z = sub i32 %x2, %y2
+  ret i32 %z
+}
+
+define i32 @nsw_inference2(i32 %x, i32 %y) {
+; CHECK-LABEL: @nsw_inference2(
+; CHECK-NEXT:    [[X2:%.*]] = and i32 [[X:%.*]], -1025
+; CHECK-NEXT:    [[Y2:%.*]] = or i32 [[Y:%.*]], -2
+; CHECK-NEXT:    [[Z:%.*]] = sub nsw i32 [[X2]], [[Y2]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %x2 = and i32 %x, -1025
+  %y2 = or i32 %y, -2
+  %z = sub i32 %x2, %y2
+  ret i32 %z
+}
diff --git a/llvm/test/Transforms/InstCombine/switch-constant-expr.ll b/llvm/test/Transforms/InstCombine/switch-constant-expr.ll
new file mode 100644
index 00000000000..c2ea83b2adb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/switch-constant-expr.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@g = global i32 0
+
+; PR30486
+define i32 @single_case() {
+; CHECK-LABEL: @single_case(
+; CHECK-NEXT:    switch i32 ptrtoint (i32* @g to i32), label %x [
+; CHECK-NEXT:    ]
+; CHECK:       x:
+; CHECK-NEXT:    ret i32 0
+;
+  switch i32 add (i32 ptrtoint (i32* @g to i32), i32 -1), label %x []
+x:
+  ret i32 0
+}
+
+define i32 @multiple_cases() {
+; CHECK-LABEL: @multiple_cases(
+; CHECK-NEXT:    switch i32 ptrtoint (i32* @g to i32), label %x [
+; CHECK-NEXT:    i32 2, label %one
+; CHECK-NEXT:    i32 3, label %two
+; CHECK-NEXT:    ]
+; CHECK:       x:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       one:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       two:
+; CHECK-NEXT:    ret i32 2
+;
+  switch i32 add (i32 ptrtoint (i32* @g to i32), i32 -1), label %x [
+  i32 1, label %one
+  i32 2, label %two
+  ]
+x:
+  ret i32 0
+
+one:
+  ret i32 1
+
+two:
+  ret i32 2
+}
diff --git a/llvm/test/Transforms/InstCombine/switch-truncate-crash.ll b/llvm/test/Transforms/InstCombine/switch-truncate-crash.ll
new file mode 100644
index 00000000000..cc3c1ff28ed
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/switch-truncate-crash.ll
@@ -0,0 +1,7 @@
+; RUN: opt -instcombine < %s
+
+define void @test() {
+  switch i32 0, label %out [i32 0, label %out]
+out:
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/tan-nofastmath.ll b/llvm/test/Transforms/InstCombine/tan-nofastmath.ll
new file mode 100644
index 00000000000..0fe7b2c1d52
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/tan-nofastmath.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mytan(float %x) {
+entry:
+  %call = call float @atanf(float %x)
+  %call1 = call float @tanf(float %call) 
+  ret float %call1
+}
+
+; CHECK-LABEL: define float @mytan(
+; CHECK:   %call = call float @atanf(float %x)
+; CHECK-NEXT:   %call1 = call float @tanf(float %call)
+; CHECK-NEXT:   ret float %call1
+; CHECK-NEXT: }
+
+declare float @tanf(float)
+declare float @atanf(float)
diff --git a/llvm/test/Transforms/InstCombine/tan.ll b/llvm/test/Transforms/InstCombine/tan.ll
new file mode 100644
index 00000000000..6ea116839fe
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/tan.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mytan(float %x) {
+  %call = call fast float @atanf(float %x)
+  %call1 = call fast float @tanf(float %call)
+  ret float %call1
+}
+
+; CHECK-LABEL: define float @mytan(
+; CHECK:   ret float %x
+
+define float @test2(float ()* %fptr) {
+  %call1 = call fast float %fptr()
+  %tan = call fast float @tanf(float %call1)
+  ret float %tan
+}
+
+; CHECK-LABEL: @test2
+; CHECK: tanf
+
+declare float @tanf(float)
+declare float @atanf(float)
+
diff --git a/llvm/test/Transforms/InstCombine/tbaa-store-to-load.ll b/llvm/test/Transforms/InstCombine/tbaa-store-to-load.ll
new file mode 100644
index 00000000000..696a1643a71
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/tbaa-store-to-load.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -instcombine < %s 2>&1 | FileCheck %s
+
+define i64 @f(i64* %p1, i64* %p2) {
+top:
+  ; check that the tbaa is preserved
+  ; CHECK-LABEL: @f(
+  ; CHECK: %v1 = load i64, i64* %p1, align 8, !tbaa !0
+  ; CHECK: store i64 %v1, i64* %p2, align 8
+  ; CHECK: ret i64 %v1
+  %v1 = load i64, i64* %p1, align 8, !tbaa !0
+  store i64 %v1, i64* %p2, align 8
+  %v2 = load i64, i64* %p2, align 8
+  ret i64 %v2
+}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"scalar type", !2}
+!2 = !{!"load_tbaa"}
diff --git a/llvm/test/Transforms/InstCombine/toascii-1.ll b/llvm/test/Transforms/InstCombine/toascii-1.ll
new file mode 100644
index 00000000000..f5e18983e12
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/toascii-1.ll
@@ -0,0 +1,59 @@
+; Test that the toascii library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @toascii(i32)
+
+; Check isascii(c) -> c & 0x7f.
+
+define i32 @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+  %ret = call i32 @toascii(i32 0)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+  %ret = call i32 @toascii(i32 1)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+  %ret = call i32 @toascii(i32 127)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 127
+}
+
+define i32 @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+  %ret = call i32 @toascii(i32 128)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify5() {
+; CHECK-LABEL: @test_simplify5(
+  %ret = call i32 @toascii(i32 255)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 127
+}
+
+define i32 @test_simplify6() {
+; CHECK-LABEL: @test_simplify6(
+  %ret = call i32 @toascii(i32 256)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify7(i32 %x) {
+; CHECK-LABEL: @test_simplify7(
+  %ret = call i32 @toascii(i32 %x)
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 127
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[AND]]
+}
diff --git a/llvm/test/Transforms/InstCombine/token.ll b/llvm/test/Transforms/InstCombine/token.ll
new file mode 100644
index 00000000000..f96b85b4f22
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/token.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+declare i32 @__CxxFrameHandler3(...)
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+  unreachable
+
+unreachable:
+  %cl = cleanuppad within none []
+  cleanupret from %cl unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: unreachable:
+; CHECK:   %cl = cleanuppad within none []
+; CHECK:   cleanupret from %cl unwind to caller
+
+define void @test2(i8 %A, i8 %B) personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+  %X = zext i8 %A to i32
+  invoke void @g(i32 0)
+    to label %cont
+    unwind label %catch
+
+cont:
+  %Y = zext i8 %B to i32
+  invoke void @g(i32 0)
+    to label %unreachable
+    unwind label %catch
+
+catch:
+  %phi = phi i32 [ %X, %bb ], [ %Y, %cont ]
+  %cs = catchswitch within none [label %doit] unwind to caller
+
+doit:
+  %cl = catchpad within %cs []
+  call void @g(i32 %phi)
+  unreachable
+
+unreachable:
+  unreachable
+}
+
+; CHECK-LABEL: define void @test2(
+; CHECK:  %X = zext i8 %A to i32
+; CHECK:  %Y = zext i8 %B to i32
+; CHECK:  %phi = phi i32 [ %X, %bb ], [ %Y, %cont ]
+
+define void @test3(i8 %A, i8 %B) personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+  %X = zext i8 %A to i32
+  invoke void @g(i32 0)
+    to label %cont
+    unwind label %catch
+
+cont:
+  %Y = zext i8 %B to i32
+  invoke void @g(i32 0)
+    to label %cont2
+    unwind label %catch
+
+cont2:
+  invoke void @g(i32 0)
+    to label %unreachable
+    unwind label %catch
+
+catch:
+  %phi = phi i32 [ %X, %bb ], [ %Y, %cont ], [ %Y, %cont2 ]
+  %cs = catchswitch within none [label %doit] unwind to caller
+
+doit:
+  %cl = catchpad within %cs []
+  call void @g(i32 %phi)
+  unreachable
+
+unreachable:
+  unreachable
+}
+
+; CHECK-LABEL: define void @test3(
+; CHECK:  %X = zext i8 %A to i32
+; CHECK:  %Y = zext i8 %B to i32
+; CHECK:  %phi = phi i32 [ %X, %bb ], [ %Y, %cont ], [ %Y, %cont2 ]
+
+declare void @foo()
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
+define void @test4(i8 addrspace(1)* %obj) gc "statepoint-example" {
+bb:
+  unreachable
+
+unreachable:
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret void
+}
+
+; CHECK-LABEL: define void @test4(
+; CHECK: unreachable:
+; CHECK:   call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+; CHECK:   ret void
+
+
+declare void @g(i32)
diff --git a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
new file mode 100644
index 00000000000..40d58f31458
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
@@ -0,0 +1,317 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i16 @narrow_sext_and(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_sext_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = sext i16 %x16 to i32
+  %b = and i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_zext_and(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_zext_and(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = zext i16 %x16 to i32
+  %b = and i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_sext_or(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_sext_or(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = or i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = sext i16 %x16 to i32
+  %b = or i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_zext_or(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_zext_or(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = or i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = zext i16 %x16 to i32
+  %b = or i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_sext_xor(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_sext_xor(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = xor i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = sext i16 %x16 to i32
+  %b = xor i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_zext_xor(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_zext_xor(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = xor i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = zext i16 %x16 to i32
+  %b = xor i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_sext_add(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_sext_add(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = add i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = sext i16 %x16 to i32
+  %b = add i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_zext_add(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_zext_add(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = add i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = zext i16 %x16 to i32
+  %b = add i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_sext_sub(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_sext_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = sub i16 %x16, [[TMP1]]
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = sext i16 %x16 to i32
+  %b = sub i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_zext_sub(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_zext_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = sub i16 %x16, [[TMP1]]
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = zext i16 %x16 to i32
+  %b = sub i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_sext_mul(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_sext_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = mul i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = sext i16 %x16 to i32
+  %b = mul i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+define i16 @narrow_zext_mul(i16 %x16, i32 %y32) {
+; CHECK-LABEL: @narrow_zext_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %y32 to i16
+; CHECK-NEXT:    [[R:%.*]] = mul i16 [[TMP1]], %x16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %x32 = zext i16 %x16 to i32
+  %b = mul i32 %x32, %y32
+  %r = trunc i32 %b to i16
+  ret i16 %r
+}
+
+; Verify that the commuted patterns work. The div is to ensure that complexity-based
+; canonicalization doesn't swap the binop operands. Use vector types to show those work too.
+
+define <2 x i16> @narrow_sext_and_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_sext_and_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = sext <2 x i16> %x16 to <2 x i32>
+  %b = and <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_zext_and_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_zext_and_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = zext <2 x i16> %x16 to <2 x i32>
+  %b = and <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_sext_or_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_sext_or_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = sext <2 x i16> %x16 to <2 x i32>
+  %b = or <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_zext_or_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_zext_or_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = zext <2 x i16> %x16 to <2 x i32>
+  %b = or <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_sext_xor_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_sext_xor_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = sext <2 x i16> %x16 to <2 x i32>
+  %b = xor <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_zext_xor_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_zext_xor_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = zext <2 x i16> %x16 to <2 x i32>
+  %b = xor <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_sext_add_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_sext_add_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = sext <2 x i16> %x16 to <2 x i32>
+  %b = add <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_zext_add_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_zext_add_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = zext <2 x i16> %x16 to <2 x i32>
+  %b = add <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_sext_sub_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_sext_sub_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = sext <2 x i16> %x16 to <2 x i32>
+  %b = sub <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_zext_sub_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_zext_sub_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = zext <2 x i16> %x16 to <2 x i32>
+  %b = sub <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_sext_mul_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_sext_mul_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = mul <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = sext <2 x i16> %x16 to <2 x i32>
+  %b = mul <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @narrow_zext_mul_commute(<2 x i16> %x16, <2 x i32> %y32) {
+; CHECK-LABEL: @narrow_zext_mul_commute(
+; CHECK-NEXT:    [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x i16>
+; CHECK-NEXT:    [[R:%.*]] = mul <2 x i16> [[TMP1]], %x16
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
+  %x32 = zext <2 x i16> %x16 to <2 x i32>
+  %b = mul <2 x i32> %y32op0, %x32
+  %r = trunc <2 x i32> %b to <2 x i16>
+  ret <2 x i16> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll
new file mode 100644
index 00000000000..01d53ab9840
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/trunc.ll
@@ -0,0 +1,626 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Instcombine should be able to eliminate all of these ext casts.
+
+declare void @use(i32)
+
+define i64 @test1(i64 %a) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[B:%.*]] = trunc i64 %a to i32
+; CHECK-NEXT:    [[C:%.*]] = and i64 %a, 15
+; CHECK-NEXT:    call void @use(i32 [[B]])
+; CHECK-NEXT:    ret i64 [[C]]
+;
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 15
+  %d = zext i32 %c to i64
+  call void @use(i32 %b)
+  ret i64 %d
+}
+
+define i64 @test2(i64 %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B:%.*]] = trunc i64 %a to i32
+; CHECK-NEXT:    [[D1:%.*]] = shl i64 %a, 36
+; CHECK-NEXT:    [[D:%.*]] = ashr exact i64 [[D1]], 36
+; CHECK-NEXT:    call void @use(i32 [[B]])
+; CHECK-NEXT:    ret i64 [[D]]
+;
+  %b = trunc i64 %a to i32
+  %c = shl i32 %b, 4
+  %q = ashr i32 %c, 4
+  %d = sext i32 %q to i64
+  call void @use(i32 %b)
+  ret i64 %d
+}
+
+define i64 @test3(i64 %a) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[B:%.*]] = trunc i64 %a to i32
+; CHECK-NEXT:    [[C:%.*]] = and i64 %a, 8
+; CHECK-NEXT:    call void @use(i32 [[B]])
+; CHECK-NEXT:    ret i64 [[C]]
+;
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 8
+  %d = zext i32 %c to i64
+  call void @use(i32 %b)
+  ret i64 %d
+}
+
+define i64 @test4(i64 %a) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[B:%.*]] = trunc i64 %a to i32
+; CHECK-NEXT:    [[C:%.*]] = and i64 %a, 8
+; CHECK-NEXT:    [[X:%.*]] = xor i64 [[C]], 8
+; CHECK-NEXT:    call void @use(i32 [[B]])
+; CHECK-NEXT:    ret i64 [[X]]
+;
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 8
+  %x = xor i32 %c, 8
+  %d = zext i32 %x to i64
+  call void @use(i32 %b)
+  ret i64 %d
+}
+
+define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[C:%.*]] = lshr i32 %A, 16
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = zext i32 %A to i128
+  %C = lshr i128 %B, 16
+  %D = trunc i128 %C to i32
+  ret i32 %D
+}
+
+define i32 @test6(i64 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = lshr i64 %A, 32
+; CHECK-NEXT:    [[D:%.*]] = trunc i64 [[C]] to i32
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %B = zext i64 %A to i128
+  %C = lshr i128 %B, 32
+  %D = trunc i128 %C to i32
+  ret i32 %D
+}
+
+; Test case where 'ashr' demanded bits does not contain any of the high bits,
+; but does contain sign bits, where the sign bit is not known to be zero.
+define i16 @ashr_mul_sign_bits(i8 %X, i8 %Y) {
+; CHECK-LABEL: @ashr_mul_sign_bits(
+; CHECK-NEXT:    [[A:%.*]] = sext i8 %X to i16
+; CHECK-NEXT:    [[B:%.*]] = sext i8 %Y to i16
+; CHECK-NEXT:    [[C:%.*]] = mul nsw i16 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = ashr i16 [[C]], 3
+; CHECK-NEXT:    ret i16 [[D]]
+  %A = sext i8 %X to i32
+  %B = sext i8 %Y to i32
+  %C = mul i32 %A, %B
+  %D = ashr i32 %C, 3
+  %E = trunc i32 %D to i16
+  ret i16 %E
+}
+
+define i16 @ashr_mul(i8 %X, i8 %Y) {
+; CHECK-LABEL: @ashr_mul(
+; CHECK-NEXT:    [[A:%.*]] = sext i8 %X to i16
+; CHECK-NEXT:    [[B:%.*]] = sext i8 %Y to i16
+; CHECK-NEXT:    [[C:%.*]] = mul nsw i16 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = ashr i16 [[C]], 8
+; CHECK-NEXT:    ret i16 [[D]]
+  %A = sext i8 %X to i20
+  %B = sext i8 %Y to i20
+  %C = mul i20 %A, %B
+  %D = ashr i20 %C, 8
+  %E = trunc i20 %D to i16
+  ret i16 %E
+}
+
+define i32 @trunc_ashr(i32 %X) {
+; CHECK-LABEL: @trunc_ashr(
+; CHECK-NEXT:    [[B:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 [[B]], 8
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = zext i32 %X to i36
+  %B = or i36 %A, -2147483648 ; 0xF80000000
+  %C = ashr i36 %B, 8
+  %T = trunc i36 %C to i32
+  ret i32  %T
+}
+
+define <2 x i32> @trunc_ashr_vec(<2 x i32> %X) {
+; CHECK-LABEL: @trunc_ashr_vec(
+; CHECK-NEXT:    [[B:%.*]] = or <2 x i32> [[X:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[C:%.*]] = ashr <2 x i32> [[B]], <i32 8, i32 8>
+; CHECK-NEXT:    ret <2 x i32> [[C]]
+;
+  %A = zext <2 x i32> %X to <2 x i36>
+  %B = or <2 x i36> %A, <i36 -2147483648, i36 -2147483648> ; 0xF80000000
+  %C = ashr <2 x i36> %B, <i36 8, i36 8>
+  %T = trunc <2 x i36> %C to <2 x i32>
+  ret <2 x i32>  %T
+}
+
+define i92 @test7(i64 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 %A, 32
+; CHECK-NEXT:    [[D:%.*]] = zext i64 [[TMP1]] to i92
+; CHECK-NEXT:    ret i92 [[D]]
+;
+  %B = zext i64 %A to i128
+  %C = lshr i128 %B, 32
+  %D = trunc i128 %C to i92
+  ret i92 %D
+}
+
+define i64 @test8(i32 %A, i32 %B) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP38:%.*]] = zext i32 %A to i64
+; CHECK-NEXT:    [[TMP32:%.*]] = zext i32 %B to i64
+; CHECK-NEXT:    [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 32
+; CHECK-NEXT:    [[INS35:%.*]] = or i64 [[TMP33]], [[TMP38]]
+; CHECK-NEXT:    ret i64 [[INS35]]
+;
+  %tmp38 = zext i32 %A to i128
+  %tmp32 = zext i32 %B to i128
+  %tmp33 = shl i128 %tmp32, 32
+  %ins35 = or i128 %tmp33, %tmp38
+  %tmp42 = trunc i128 %ins35 to i64
+  ret i64 %tmp42
+}
+
+define i8 @test9(i32 %X) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 %X to i8
+; CHECK-NEXT:    [[Z:%.*]] = and i8 [[TMP1]], 42
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+  %Y = and i32 %X, 42
+  %Z = trunc i32 %Y to i8
+  ret i8 %Z
+}
+
+; rdar://8808586
+define i8 @test10(i32 %X) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[Y:%.*]] = trunc i32 %X to i8
+; CHECK-NEXT:    [[Z:%.*]] = and i8 [[Y]], 42
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+  %Y = trunc i32 %X to i8
+  %Z = and i8 %Y, 42
+  ret i8 %Z
+}
+
+; PR25543
+; https://llvm.org/bugs/show_bug.cgi?id=25543
+; This is an extractelement.
+
+define i32 @trunc_bitcast1(<4 x i32> %v) {
+; CHECK-LABEL: @trunc_bitcast1(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x i32> %v, i32 1
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %bc = bitcast <4 x i32> %v to i128
+  %shr = lshr i128 %bc, 32
+  %ext = trunc i128 %shr to i32
+  ret i32 %ext
+}
+
+; A bitcast may still be required.
+
+define i32 @trunc_bitcast2(<2 x i64> %v) {
+; CHECK-LABEL: @trunc_bitcast2(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <2 x i64> %v to <4 x i32>
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x i32> [[BC1]], i32 2
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %bc = bitcast <2 x i64> %v to i128
+  %shr = lshr i128 %bc, 64
+  %ext = trunc i128 %shr to i32
+  ret i32 %ext
+}
+
+; The right shift is optional.
+
+define i32 @trunc_bitcast3(<4 x i32> %v) {
+; CHECK-LABEL: @trunc_bitcast3(
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x i32> %v, i32 0
+; CHECK-NEXT:    ret i32 [[EXT]]
+;
+  %bc = bitcast <4 x i32> %v to i128
+  %ext = trunc i128 %bc to i32
+  ret i32 %ext
+}
+
+define i32 @trunc_shl_31_i32_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_31_i32_i64(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i64 %val to i32
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 31
+; CHECK-NEXT:    ret i32 [[TRUNC]]
+;
+  %shl = shl i64 %val, 31
+  %trunc = trunc i64 %shl to i32
+  ret i32 %trunc
+}
+
+define i32 @trunc_shl_nsw_31_i32_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_nsw_31_i32_i64(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i64 %val to i32
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 31
+; CHECK-NEXT:    ret i32 [[TRUNC]]
+;
+  %shl = shl nsw i64 %val, 31
+  %trunc = trunc i64 %shl to i32
+  ret i32 %trunc
+}
+
+define i32 @trunc_shl_nuw_31_i32_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_nuw_31_i32_i64(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i64 %val to i32
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 31
+; CHECK-NEXT:    ret i32 [[TRUNC]]
+;
+  %shl = shl nuw i64 %val, 31
+  %trunc = trunc i64 %shl to i32
+  ret i32 %trunc
+}
+
+define i32 @trunc_shl_nsw_nuw_31_i32_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_nsw_nuw_31_i32_i64(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i64 %val to i32
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 31
+; CHECK-NEXT:    ret i32 [[TRUNC]]
+;
+  %shl = shl nsw nuw i64 %val, 31
+  %trunc = trunc i64 %shl to i32
+  ret i32 %trunc
+}
+
+define i16 @trunc_shl_15_i16_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_15_i16_i64(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i64 %val to i16
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i16 [[VAL_TR]], 15
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %shl = shl i64 %val, 15
+  %trunc = trunc i64 %shl to i16
+  ret i16 %trunc
+}
+
+define i16 @trunc_shl_15_i16_i32(i32 %val) {
+; CHECK-LABEL: @trunc_shl_15_i16_i32(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i32 %val to i16
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i16 [[VAL_TR]], 15
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %shl = shl i32 %val, 15
+  %trunc = trunc i32 %shl to i16
+  ret i16 %trunc
+}
+
+define i8 @trunc_shl_7_i8_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_7_i8_i64(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i64 %val to i8
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i8 [[VAL_TR]], 7
+; CHECK-NEXT:    ret i8 [[TRUNC]]
+;
+  %shl = shl i64 %val, 7
+  %trunc = trunc i64 %shl to i8
+  ret i8 %trunc
+}
+
+define i2 @trunc_shl_1_i2_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_1_i2_i64(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 %val, 1
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[SHL]] to i2
+; CHECK-NEXT:    ret i2 [[TRUNC]]
+;
+  %shl = shl i64 %val, 1
+  %trunc = trunc i64 %shl to i2
+  ret i2 %trunc
+}
+
+define i32 @trunc_shl_1_i32_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_1_i32_i64(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i64 %val to i32
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 1
+; CHECK-NEXT:    ret i32 [[TRUNC]]
+;
+  %shl = shl i64 %val, 1
+  %trunc = trunc i64 %shl to i32
+  ret i32 %trunc
+}
+
+define i32 @trunc_shl_16_i32_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_16_i32_i64(
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc i64 %val to i32
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl i32 [[VAL_TR]], 16
+; CHECK-NEXT:    ret i32 [[TRUNC]]
+;
+  %shl = shl i64 %val, 16
+  %trunc = trunc i64 %shl to i32
+  ret i32 %trunc
+}
+
+define i32 @trunc_shl_33_i32_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_33_i32_i64(
+; CHECK-NEXT:    ret i32 0
+;
+  %shl = shl i64 %val, 33
+  %trunc = trunc i64 %shl to i32
+  ret i32 %trunc
+}
+
+define i32 @trunc_shl_32_i32_i64(i64 %val) {
+; CHECK-LABEL: @trunc_shl_32_i32_i64(
+; CHECK-NEXT:    ret i32 0
+;
+  %shl = shl i64 %val, 32
+  %trunc = trunc i64 %shl to i32
+  ret i32 %trunc
+}
+
+; TODO: Should be able to handle vectors
+define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) {
+; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> %val, <i64 16, i64 16>
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
+;
+  %shl = shl <2 x i64> %val, <i64 16, i64 16>
+  %trunc = trunc <2 x i64> %shl to <2 x i32>
+  ret <2 x i32> %trunc
+}
+
+define <2 x i32> @trunc_shl_nosplat_v2i32_v2i64(<2 x i64> %val) {
+; CHECK-LABEL: @trunc_shl_nosplat_v2i32_v2i64(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> %val, <i64 15, i64 16>
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
+;
+  %shl = shl <2 x i64> %val, <i64 15, i64 16>
+  %trunc = trunc <2 x i64> %shl to <2 x i32>
+  ret <2 x i32> %trunc
+}
+
+define void @trunc_shl_31_i32_i64_multi_use(i64 %val, i32 addrspace(1)* %ptr0, i64 addrspace(1)* %ptr1) {
+; CHECK-LABEL: @trunc_shl_31_i32_i64_multi_use(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i64 %val, 31
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i64 [[SHL]] to i32
+; CHECK-NEXT:    store volatile i32 [[TRUNC]], i32 addrspace(1)* %ptr0, align 4
+; CHECK-NEXT:    store volatile i64 [[SHL]], i64 addrspace(1)* %ptr1, align 8
+; CHECK-NEXT:    ret void
+;
+  %shl = shl i64 %val, 31
+  %trunc = trunc i64 %shl to i32
+  store volatile i32 %trunc, i32 addrspace(1)* %ptr0
+  store volatile i64 %shl, i64 addrspace(1)* %ptr1
+  ret void
+}
+
+define i32 @trunc_shl_lshr_infloop(i64 %arg) {
+; CHECK-LABEL: @trunc_shl_lshr_infloop(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 %arg, 1
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp0 = lshr i64 %arg, 1
+  %tmp1 = shl i64 %tmp0, 2
+  %tmp2 = trunc i64 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+define i32 @trunc_shl_ashr_infloop(i64 %arg) {
+; CHECK-LABEL: @trunc_shl_ashr_infloop(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i64 %arg, 3
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp0 = ashr i64 %arg, 3
+  %tmp1 = shl i64 %tmp0, 2
+  %tmp2 = trunc i64 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+define i32 @trunc_shl_shl_infloop(i64 %arg) {
+; CHECK-LABEL: @trunc_shl_shl_infloop(
+; CHECK-NEXT:    [[ARG_TR:%.*]] = trunc i64 %arg to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[ARG_TR]], 3
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp0 = shl i64 %arg, 1
+  %tmp1 = shl i64 %tmp0, 2
+  %tmp2 = trunc i64 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+define i32 @trunc_shl_lshr_var(i64 %arg, i64 %val) {
+; CHECK-LABEL: @trunc_shl_lshr_var(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 %arg, %val
+; CHECK-NEXT:    [[TMP0_TR:%.*]] = trunc i64 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP0_TR]], 2
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp0 = lshr i64 %arg, %val
+  %tmp1 = shl i64 %tmp0, 2
+  %tmp2 = trunc i64 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+define i32 @trunc_shl_ashr_var(i64 %arg, i64 %val) {
+; CHECK-LABEL: @trunc_shl_ashr_var(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i64 %arg, %val
+; CHECK-NEXT:    [[TMP0_TR:%.*]] = trunc i64 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP0_TR]], 2
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp0 = ashr i64 %arg, %val
+  %tmp1 = shl i64 %tmp0, 2
+  %tmp2 = trunc i64 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) {
+; CHECK-LABEL: @trunc_shl_shl_var(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 %arg, %val
+; CHECK-NEXT:    [[TMP0_TR:%.*]] = trunc i64 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP0_TR]], 2
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp0 = shl i64 %arg, %val
+  %tmp1 = shl i64 %tmp0, 2
+  %tmp2 = trunc i64 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) {
+; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[CONV:%.*]] = trunc <8 x i32> [[SHL]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[CONV]]
+;
+  %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  %conv = trunc <8 x i32> %shl to <8 x i16>
+  ret <8 x i16> %conv
+}
+
+define <8 x i16> @trunc_shl_v8i16_v8i32_16(<8 x i32> %a) {
+; CHECK-LABEL: @trunc_shl_v8i16_v8i32_16(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %shl = shl <8 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  %conv = trunc <8 x i32> %shl to <8 x i16>
+  ret <8 x i16> %conv
+}
+
+define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) {
+; CHECK-LABEL: @trunc_shl_v8i16_v8i32_17(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %shl = shl <8 x i32> %a, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
+  %conv = trunc <8 x i32> %shl to <8 x i16>
+  ret <8 x i16> %conv
+}
+
+define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) {
+; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[CONV:%.*]] = trunc <8 x i32> [[SHL]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[CONV]]
+;
+  %shl = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  %conv = trunc <8 x i32> %shl to <8 x i16>
+  ret <8 x i16> %conv
+}
+
+; Although the mask is the same value, we don't create a shuffle for types that the backend may not be able to handle:
+; trunc (shuffle X, C, Mask) --> shuffle (trunc X), C', Mask
+
+define <4 x i8> @wide_shuf(<4 x i32> %x) {
+; CHECK-LABEL: @wide_shuf(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> %x, <4 x i32> <i32 undef, i32 3634, i32 90, i32 undef>, <4 x i32> <i32 1, i32 5, i32 6, i32 2>
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <4 x i32> [[SHUF]] to <4 x i8>
+; CHECK-NEXT:    ret <4 x i8> [[TRUNC]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> <i32 35, i32 3634, i32 90, i32 -1>, <4 x i32> <i32 1, i32 5, i32 6, i32 2>
+  %trunc = trunc <4 x i32> %shuf to <4 x i8>
+  ret <4 x i8> %trunc
+}
+
+; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask
+
+define <4 x i8> @wide_splat1(<4 x i32> %x) {
+; CHECK-LABEL: @wide_splat1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> %x to <4 x i8>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i8> [[TRUNC]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  %trunc = trunc <4 x i32> %shuf to <4 x i8>
+  ret <4 x i8> %trunc
+}
+
+; Test weird types.
+; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask
+
+define <3 x i31> @wide_splat2(<3 x i33> %x) {
+; CHECK-LABEL: @wide_splat2(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <3 x i33> %x to <3 x i31>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <3 x i31> [[TMP1]], <3 x i31> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <3 x i31> [[TRUNC]]
+;
+  %shuf = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> <i32 1, i32 1, i32 1>
+  %trunc = trunc <3 x i33> %shuf to <3 x i31>
+  ret <3 x i31> %trunc
+}
+
+; FIXME:
+; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask
+; A mask with undef elements should still be considered a splat mask.
+
+define <3 x i31> @wide_splat3(<3 x i33> %x) {
+; CHECK-LABEL: @wide_splat3(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> <i32 undef, i32 1, i32 1>
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <3 x i33> [[SHUF]] to <3 x i31>
+; CHECK-NEXT:    ret <3 x i31> [[TRUNC]]
+;
+  %shuf = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> <i32 undef, i32 1, i32 1>
+  %trunc = trunc <3 x i33> %shuf to <3 x i31>
+  ret <3 x i31> %trunc
+}
+
+; TODO: The shuffle extends the length of the input vector. Should we shrink this?
+
+define <8 x i8> @wide_lengthening_splat(<4 x i16> %v) {
+; CHECK-LABEL: @wide_lengthening_splat(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TR:%.*]] = trunc <8 x i16> [[SHUF]] to <8 x i8>
+; CHECK-NEXT:    ret <8 x i8> [[TR]]
+;
+  %shuf = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer
+  %tr = trunc <8 x i16> %shuf to <8 x i8>
+  ret <8 x i8> %tr
+}
+
+define <2 x i8> @narrow_add_vec_constant(<2 x i32> %x) {
+; CHECK-LABEL: @narrow_add_vec_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT:    [[TR:%.*]] = add <2 x i8> [[TMP1]], <i8 0, i8 127>
+; CHECK-NEXT:    ret <2 x i8> [[TR]]
+;
+  %add = add <2 x i32> %x, <i32 256, i32 -129>
+  %tr = trunc <2 x i32> %add to <2 x i8>
+  ret <2 x i8> %tr
+}
+
+define <2 x i8> @narrow_mul_vec_constant(<2 x i32> %x) {
+; CHECK-LABEL: @narrow_mul_vec_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT:    [[TR:%.*]] = mul <2 x i8> [[TMP1]], <i8 0, i8 127>
+; CHECK-NEXT:    ret <2 x i8> [[TR]]
+;
+  %add = mul <2 x i32> %x, <i32 256, i32 -129>
+  %tr = trunc <2 x i32> %add to <2 x i8>
+  ret <2 x i8> %tr
+}
+
+define <2 x i8> @narrow_sub_vec_constant(<2 x i32> %x) {
+; CHECK-LABEL: @narrow_sub_vec_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT:    [[TR:%.*]] = sub <2 x i8> <i8 0, i8 127>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x i8> [[TR]]
+;
+  %sub = sub <2 x i32> <i32 256, i32 -129>, %x
+  %tr = trunc <2 x i32> %sub to <2 x i8>
+  ret <2 x i8> %tr
+}
+
diff --git a/llvm/test/Transforms/InstCombine/type_pun.ll b/llvm/test/Transforms/InstCombine/type_pun.ll
new file mode 100644
index 00000000000..56d1ffcb5d3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/type_pun.ll
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Ensure that type punning using a union of vector and same-sized array
+; generates an extract instead of a shuffle with an uncommon vector size:
+;
+;   typedef uint32_t v4i32 __attribute__((vector_size(16)));
+;   union { v4i32 v; uint32_t a[4]; };
+;
+; This cleans up behind SROA, which inserts the uncommon vector size when
+; cleaning up the alloca/store/GEP/load.
+
+
+; Provide legal integer types.
+target datalayout = "p:32:32"
+
+
+; Extracting the zeroth element in an i32 array.
+define i32 @type_pun_zeroth(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_zeroth(
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    ret i32 [[SROA_EXTRACT]]
+;
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = bitcast <4 x i8> %sroa to i32
+  ret i32 %1
+}
+
+; Extracting the first element in an i32 array.
+define i32 @type_pun_first(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_first(
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 1
+; CHECK-NEXT:    ret i32 [[SROA_EXTRACT]]
+;
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %1 = bitcast <4 x i8> %sroa to i32
+  ret i32 %1
+}
+
+; Extracting an i32 that isn't aligned to any natural boundary.
+define i32 @type_pun_misaligned(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_misaligned(
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> undef, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[SROA_EXTRACT]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    ret i32 [[SROA_EXTRACT1]]
+;
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
+  %1 = bitcast <4 x i8> %sroa to i32
+  ret i32 %1
+}
+
+; Type punning to an array of pointers.
+define i32* @type_pun_pointer(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_pointer(
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr i32 [[SROA_EXTRACT]] to i32*
+; CHECK-NEXT:    ret i32* [[TMP1]]
+;
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = bitcast <4 x i8> %sroa to i32
+  %2 = inttoptr i32 %1 to i32*
+  ret i32* %2
+}
+
+; Type punning to an array of 32-bit floating-point values.
+define float @type_pun_float(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_float(
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x float>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x float> [[SROA_BC]], i32 0
+; CHECK-NEXT:    ret float [[SROA_EXTRACT]]
+;
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = bitcast <4 x i8> %sroa to float
+  ret float %1
+}
+
+; Type punning to an array of 64-bit floating-point values.
+define double @type_pun_double(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_double(
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <2 x double>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <2 x double> [[SROA_BC]], i32 0
+; CHECK-NEXT:    ret double [[SROA_EXTRACT]]
+;
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = bitcast <8 x i8> %sroa to double
+  ret double %1
+}
+
+; Type punning to same-size floating-point and integer values.
+; Verify that multiple uses with different bitcast types are properly handled.
+define { float, i32 } @type_pun_float_i32(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_float_i32(
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    [[SROA_BC1:%.*]] = bitcast <16 x i8> [[IN]] to <4 x float>
+; CHECK-NEXT:    [[SROA_EXTRACT2:%.*]] = extractelement <4 x float> [[SROA_BC1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { float, i32 } undef, float [[SROA_EXTRACT2]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { float, i32 } [[TMP1]], i32 [[SROA_EXTRACT]], 1
+; CHECK-NEXT:    ret { float, i32 } [[TMP2]]
+;
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %f = bitcast <4 x i8> %sroa to float
+  %i = bitcast <4 x i8> %sroa to i32
+  %1 = insertvalue { float, i32 } undef, float %f, 0
+  %2 = insertvalue { float, i32 } %1, i32 %i, 1
+  ret { float, i32 } %2
+}
+
+; Type punning two i32 values, with control flow.
+; Verify that the bitcast is shared and dominates usage.
+define i32 @type_pun_i32_ctrl(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_i32_ctrl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SROA_BC:%.*]] = bitcast <16 x i8> [[IN:%.*]] to <4 x i32>
+; CHECK-NEXT:    br i1 undef, label [[LEFT:%.*]], label [[RIGHT:%.*]]
+; CHECK:       left:
+; CHECK-NEXT:    [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    br label [[TAIL:%.*]]
+; CHECK:       right:
+; CHECK-NEXT:    [[SROA_EXTRACT:%.*]] = extractelement <4 x i32> [[SROA_BC]], i32 0
+; CHECK-NEXT:    br label [[TAIL]]
+; CHECK:       tail:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[SROA_EXTRACT1]], [[LEFT]] ], [ [[SROA_EXTRACT]], [[RIGHT]] ]
+; CHECK-NEXT:    ret i32 [[I]]
+;
+entry:
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  br i1 undef, label %left, label %right
+left:
+  %lhs = bitcast <4 x i8> %sroa to i32
+  br label %tail
+right:
+  %rhs = bitcast <4 x i8> %sroa to i32
+  br label %tail
+tail:
+  %i = phi i32 [ %lhs, %left ], [ %rhs, %right ]
+  ret i32 %i
+}
+
+; Extracting a type that won't fit in a vector isn't handled. The function
+; should stay the same.
+define i40 @type_pun_unhandled(<16 x i8> %in) {
+; CHECK-LABEL: @type_pun_unhandled(
+; CHECK-NEXT:    [[SROA:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> undef, <5 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <5 x i8> [[SROA]] to i40
+; CHECK-NEXT:    ret i40 [[TMP1]]
+;
+  %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, <5 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8>
+  %1 = bitcast <5 x i8> %sroa to i40
+  ret i40 %1
+}
diff --git a/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll
new file mode 100644
index 00000000000..b306cb619f8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32>, <2 x i32>)
+
+declare { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8>, <2 x i8>)
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+
+declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8)
+
+define { i32, i1 } @simple_fold(i32 %x) {
+; CHECK-LABEL: @simple_fold(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 20)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = add nuw i32 %x, 7
+  %b = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 13)
+  ret { i32, i1 } %b
+}
+
+define { i8, i1 } @fold_on_constant_add_no_overflow(i8 %x) {
+; CHECK-LABEL: @fold_on_constant_add_no_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X:%.*]], i8 -1)
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %a = add nuw i8 %x, 200
+  %b = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 55)
+  ret { i8, i1 } %b
+}
+
+define { i8, i1 } @no_fold_on_constant_add_overflow(i8 %x) {
+; CHECK-LABEL: @no_fold_on_constant_add_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[X:%.*]], 0
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %a = add nuw i8 %x, 200
+  %b = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 56)
+  ret { i8, i1 } %b
+}
+
+define { <2 x i8>, <2 x i1> } @no_fold_vector_no_overflow(<2 x i8> %x) {
+; CHECK-LABEL: @no_fold_vector_no_overflow(
+; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i8> [[X:%.*]], <i8 -57, i8 -56>
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> <i8 55, i8 55>)
+; CHECK-NEXT:    ret { <2 x i8>, <2 x i1> } [[B]]
+;
+  %a = add nuw <2 x i8> %x, <i8 199, i8 200>
+  %b = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> %a, <2 x i8> <i8 55, i8 55>)
+  ret { <2 x i8>, <2 x i1> } %b
+}
+
+define { <2 x i8>, <2 x i1> } @no_fold_vector_overflow(<2 x i8> %x) {
+; CHECK-LABEL: @no_fold_vector_overflow(
+; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i8> [[X:%.*]], <i8 -56, i8 -55>
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> <i8 55, i8 55>)
+; CHECK-NEXT:    ret { <2 x i8>, <2 x i1> } [[B]]
+;
+  %a = add nuw <2 x i8> %x, <i8 200, i8 201>
+  %b = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> %a, <2 x i8> <i8 55, i8 55>)
+  ret { <2 x i8>, <2 x i1> } %b
+}
+
+define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
+; CHECK-LABEL: @fold_simple_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 42, i32 42>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[TMP1]]
+;
+  %a = add nuw <2 x i32> %x, <i32 12, i32 12>
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
+; CHECK-LABEL: @no_fold_splat_undef_constant(
+; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i32> [[X:%.*]], <i32 12, i32 undef>
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
+;
+  %a = add nuw <2 x i32> %x, <i32 12, i32 undef>
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @no_fold_splat_not_constant(
+; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
+;
+  %a = add nuw <2 x i32> %x, %y
+  %b = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 30, i32 30>)
+  ret { <2 x i32>, <2 x i1> } %b
+}
+
+define { i32, i1 } @fold_nuwnsw(i32 %x) {
+; CHECK-LABEL: @fold_nuwnsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %a = add nuw nsw i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 30)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @no_fold_nsw(i32 %x) {
+; CHECK-LABEL: @no_fold_nsw(
+; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X:%.*]], 12
+; CHECK-NEXT:    [[B:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A]], i32 30)
+; CHECK-NEXT:    ret { i32, i1 } [[B]]
+;
+  %a = add nsw i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 30)
+  ret { i32, i1 } %b
+}
+
+define { i32, i1 } @no_fold_wrapped_add(i32 %x) {
+; CHECK-LABEL: @no_fold_wrapped_add(
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 12
+; CHECK-NEXT:    [[B:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A]], i32 30)
+; CHECK-NEXT:    ret { i32, i1 } [[B]]
+;
+  %a = add i32 %x, 12
+  %b = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 30, i32 %a)
+  ret { i32, i1 } %b
+}
diff --git a/llvm/test/Transforms/InstCombine/uaddo.ll b/llvm/test/Transforms/InstCombine/uaddo.ll
new file mode 100644
index 00000000000..be8e0e67588
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/uaddo.ll
@@ -0,0 +1,182 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @uaddo_commute1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_commute1(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[Z:%.*]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %x, %y
+  %c = icmp ugt i32 %x, %noty
+  %r = select i1 %c, i32 %z, i32 %a
+  ret i32 %r
+}
+
+define <2 x i32> @uaddo_commute2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+; CHECK-LABEL: @uaddo_commute2(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor <2 x i32> [[Y:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[Z:%.*]], <2 x i32> [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %noty = xor <2 x i32> %y, <i32 -1, i32 -1>
+  %a = add <2 x i32> %y, %x
+  %c = icmp ugt <2 x i32> %x, %noty
+  %r = select <2 x i1> %c, <2 x i32> %z, <2 x i32> %a
+  ret <2 x i32> %r
+}
+
+define i32 @uaddo_commute3(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_commute3(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[Z:%.*]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %x, %y
+  %c = icmp ult i32 %noty, %x
+  %r = select i1 %c, i32 %z, i32 %a
+  ret i32 %r
+}
+
+define i32 @uaddo_commute4(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_commute4(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[Z:%.*]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %y, %x
+  %c = icmp ult i32 %noty, %x
+  %r = select i1 %c, i32 %z, i32 %a
+  ret i32 %r
+}
+
+define i32 @uaddo_commute5(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_commute5(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %x, %y
+  %c = icmp ugt i32 %x, %noty
+  %r = select i1 %c, i32 %a, i32 %z
+  ret i32 %r
+}
+
+define i32 @uaddo_commute6(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_commute6(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %y, %x
+  %c = icmp ugt i32 %x, %noty
+  %r = select i1 %c, i32 %a, i32 %z
+  ret i32 %r
+}
+
+define i32 @uaddo_commute7(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_commute7(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %x, %y
+  %c = icmp ult i32 %noty, %x
+  %r = select i1 %c, i32 %a, i32 %z
+  ret i32 %r
+}
+
+define i32 @uaddo_commute8(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_commute8(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %y, %x
+  %c = icmp ult i32 %noty, %x
+  %r = select i1 %c, i32 %a, i32 %z
+  ret i32 %r
+}
+
+define i32 @uaddo_wrong_pred1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_wrong_pred1(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[Z:%.*]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %x, %y
+  %c = icmp ult i32 %x, %noty
+  %r = select i1 %c, i32 %z, i32 %a
+  ret i32 %r
+}
+
+define i32 @uaddo_wrong_pred2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @uaddo_wrong_pred2(
+; CHECK-NEXT:    [[NOTY:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[NOTY]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %noty = xor i32 %y, -1
+  %a = add i32 %x, %y
+  %c = icmp uge i32 %x, %noty
+  %r = select i1 %c, i32 %z, i32 %a
+  ret i32 %r
+}
+
+; icmp canonicalization should be consistent for these cases.
+; Either the compare depends on the sum or not.
+
+define i1 @uaddo_1(i8 %x, i8* %p) {
+; CHECK-LABEL: @uaddo_1(
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT:    store i8 [[A]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = add i8 %x, 1
+  store i8 %a, i8* %p
+  %c = icmp ult i8 %a, 1
+  ret i1 %c
+}
+
+define i1 @uaddo_neg1(i8 %x, i8* %p) {
+; CHECK-LABEL: @uaddo_neg1(
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], -1
+; CHECK-NEXT:    store i8 [[A]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = add i8 %x, -1
+  store i8 %a, i8* %p
+  %c = icmp ne i8 %a, -1
+  ret i1 %c
+}
+
diff --git a/llvm/test/Transforms/InstCombine/udiv-simplify.ll b/llvm/test/Transforms/InstCombine/udiv-simplify.ll
new file mode 100644
index 00000000000..8fd604b819e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/udiv-simplify.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @test1(i32 %x) nounwind {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i64 0
+;
+  %y = lshr i32 %x, 1
+  %r = udiv i32 %y, -1
+  %z = sext i32 %r to i64
+  ret i64 %z
+}
+define i64 @test2(i32 %x) nounwind {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i64 0
+;
+  %y = lshr i32 %x, 31
+  %r = udiv i32 %y, 3
+  %z = sext i32 %r to i64
+  ret i64 %z
+}
+
+; The udiv instructions shouldn't be optimized away, and the
+; sext instructions should be optimized to zext.
+
+define i64 @test1_PR2274(i32 %x, i32 %g) nounwind {
+; CHECK-LABEL: @test1_PR2274(
+; CHECK-NEXT:    [[Y:%.*]] = lshr i32 [[X:%.*]], 30
+; CHECK-NEXT:    [[R:%.*]] = udiv i32 [[Y]], [[G:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[R]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %y = lshr i32 %x, 30
+  %r = udiv i32 %y, %g
+  %z = sext i32 %r to i64
+  ret i64 %z
+}
+define i64 @test2_PR2274(i32 %x, i32 %v) nounwind {
+; CHECK-LABEL: @test2_PR2274(
+; CHECK-NEXT:    [[Y:%.*]] = lshr i32 [[X:%.*]], 31
+; CHECK-NEXT:    [[R:%.*]] = udiv i32 [[Y]], [[V:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[R]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %y = lshr i32 %x, 31
+  %r = udiv i32 %y, %v
+  %z = sext i32 %r to i64
+  ret i64 %z
+}
+
+; The udiv should be simplified according to the rule:
+; X udiv (C1 << N), where C1 is `1<<C2` --> X >> (N+C2)
+@b = external global [1 x i16]
+
+define i32 @PR30366(i1 %a) {
+; CHECK-LABEL: @PR30366(
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[A:%.*]] to i32
+; CHECK-NEXT:    [[D:%.*]] = lshr i32 [[Z]], zext (i16 ptrtoint ([1 x i16]* @b to i16) to i32)
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %z = zext i1 %a to i32
+  %d = udiv i32 %z, zext (i16 shl (i16 1, i16 ptrtoint ([1 x i16]* @b to i16)) to i32)
+  ret i32 %d
+}
+
+; OSS-Fuzz #4857
+; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=4857
+define i177 @ossfuzz_4857(i177 %X, i177 %Y) {
+; CHECK-LABEL: @ossfuzz_4857(
+; CHECK-NEXT:    store i1 false, i1* undef, align 1
+; CHECK-NEXT:    ret i177 0
+;
+  %B5 = udiv i177 %Y, -1
+  %B4 = add i177 %B5, -1
+  %B2 = add i177 %B4, -1
+  %B6 = mul i177 %B5, %B2
+  %B3 = add i177 %B2, %B2
+  %B9 = xor i177 %B4, %B3
+  %B13 = ashr i177 %Y, %B2
+  %B22 = add i177 %B9, %B13
+  %B1 = udiv i177 %B5, %B6
+  %C9 = icmp ult i177 %Y, %B22
+  store i1 %C9, i1* undef
+  ret i177 %B1
+}
+
+define i32 @udiv_demanded(i32 %a) {
+; CHECK-LABEL: @udiv_demanded(
+; CHECK-NEXT:    [[U:%.*]] = udiv i32 [[A:%.*]], 12
+; CHECK-NEXT:    ret i32 [[U]]
+;
+  %o = or i32 %a, 3
+  %u = udiv i32 %o, 12
+  ret i32 %u
+}
+
+define i32 @udiv_exact_demanded(i32 %a) {
+; CHECK-LABEL: @udiv_exact_demanded(
+; CHECK-NEXT:    [[O:%.*]] = and i32 [[A:%.*]], -3
+; CHECK-NEXT:    [[U:%.*]] = udiv exact i32 [[O]], 12
+; CHECK-NEXT:    ret i32 [[U]]
+;
+  %o = and i32 %a, -3
+  %u = udiv exact i32 %o, 12
+  ret i32 %u
+}
diff --git a/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll b/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
new file mode 100644
index 00000000000..0996fd59a77
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Test that this transform works:
+; udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+
+define i64 @test(i64 %X, i1 %Cond ) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[QUOTIENT1:%.*]] = lshr i64 [[X:%.*]], 4
+; CHECK-NEXT:    [[QUOTIENT2:%.*]] = lshr i64 [[X]], 3
+; CHECK-NEXT:    [[SUM:%.*]] = add nuw nsw i64 [[QUOTIENT1]], [[QUOTIENT2]]
+; CHECK-NEXT:    ret i64 [[SUM]]
+;
+  %divisor1 = select i1 %Cond, i64 16, i64 8
+  %quotient1 = udiv i64 %X, %divisor1
+  %divisor2 = select i1 %Cond, i64 8, i64 0
+  %quotient2 = udiv i64 %X, %divisor2
+  %sum = add i64 %quotient1, %quotient2
+  ret i64 %sum
+}
+
+; https://bugs.llvm.org/show_bug.cgi?id=34856
+; This would assert/crash because we didn't propagate the condition with the correct vector type.
+
+define <2 x i32> @PR34856(<2 x i32> %t0, <2 x i32> %t1) {
+; CHECK-LABEL: @PR34856(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i32> [[T1:%.*]], <i32 -8, i32 -8>
+; CHECK-NEXT:    [[DIV1:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[DIV1]]
+;
+  %cmp = icmp eq <2 x i32> %t0, <i32 1, i32 1>
+  %zext = zext <2 x i1> %cmp to <2 x i32>
+  %neg = select <2 x i1> %cmp, <2 x i32> zeroinitializer, <2 x i32> <i32 -7, i32 -7>
+  %div1 = udiv <2 x i32> %t1, %neg
+  %use_cmp_again = add <2 x i32> %div1, %zext
+  ret <2 x i32> %use_cmp_again
+}
+
diff --git a/llvm/test/Transforms/InstCombine/udivrem-change-width.ll b/llvm/test/Transforms/InstCombine/udivrem-change-width.ll
new file mode 100644
index 00000000000..d96f9debade
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/udivrem-change-width.ll
@@ -0,0 +1,288 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "n8:32"
+
+; PR4548
+define i8 @udiv_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: @udiv_i8(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i8 %a, %b
+; CHECK-NEXT:    ret i8 [[DIV]]
+;
+  %za = zext i8 %a to i32
+  %zb = zext i8 %b to i32
+  %udiv = udiv i32 %za, %zb
+  %conv3 = trunc i32 %udiv to i8
+  ret i8 %conv3
+}
+
+define <2 x i8> @udiv_i8_vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @udiv_i8_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <2 x i8> %a, %b
+; CHECK-NEXT:    ret <2 x i8> [[DIV]]
+;
+  %za = zext <2 x i8> %a to <2 x i32>
+  %zb = zext <2 x i8> %b to <2 x i32>
+  %udiv = udiv <2 x i32> %za, %zb
+  %conv3 = trunc <2 x i32> %udiv to <2 x i8>
+  ret <2 x i8> %conv3
+}
+
+define i8 @urem_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: @urem_i8(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 %a, %b
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %za = zext i8 %a to i32
+  %zb = zext i8 %b to i32
+  %udiv = urem i32 %za, %zb
+  %conv3 = trunc i32 %udiv to i8
+  ret i8 %conv3
+}
+
+define <2 x i8> @urem_i8_vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @urem_i8_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem <2 x i8> %a, %b
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
+;
+  %za = zext <2 x i8> %a to <2 x i32>
+  %zb = zext <2 x i8> %b to <2 x i32>
+  %udiv = urem <2 x i32> %za, %zb
+  %conv3 = trunc <2 x i32> %udiv to <2 x i8>
+  ret <2 x i8> %conv3
+}
+
+define i32 @udiv_i32(i8 %a, i8 %b) {
+; CHECK-LABEL: @udiv_i32(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i8 %a, %b
+; CHECK-NEXT:    [[UDIV:%.*]] = zext i8 [[DIV]] to i32
+; CHECK-NEXT:    ret i32 [[UDIV]]
+;
+  %za = zext i8 %a to i32
+  %zb = zext i8 %b to i32
+  %udiv = udiv i32 %za, %zb
+  ret i32 %udiv
+}
+
+define <2 x i32> @udiv_i32_vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @udiv_i32_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <2 x i8> %a, %b
+; CHECK-NEXT:    [[UDIV:%.*]] = zext <2 x i8> [[DIV]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[UDIV]]
+;
+  %za = zext <2 x i8> %a to <2 x i32>
+  %zb = zext <2 x i8> %b to <2 x i32>
+  %udiv = udiv <2 x i32> %za, %zb
+  ret <2 x i32> %udiv
+}
+
+define i32 @udiv_i32_multiuse(i8 %a, i8 %b) {
+; CHECK-LABEL: @udiv_i32_multiuse(
+; CHECK-NEXT:    [[ZA:%.*]] = zext i8 %a to i32
+; CHECK-NEXT:    [[ZB:%.*]] = zext i8 %b to i32
+; CHECK-NEXT:    [[UDIV:%.*]] = udiv i32 [[ZA]], [[ZB]]
+; CHECK-NEXT:    [[EXTRA_USES:%.*]] = add nuw nsw i32 [[ZA]], [[ZB]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i32 [[UDIV]], [[EXTRA_USES]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %za = zext i8 %a to i32
+  %zb = zext i8 %b to i32
+  %udiv = udiv i32 %za, %zb
+  %extra_uses = add i32 %za, %zb
+  %r = mul i32 %udiv, %extra_uses
+  ret i32 %r
+}
+
+define i32 @udiv_illegal_type(i9 %a, i9 %b) {
+; CHECK-LABEL: @udiv_illegal_type(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i9 %a, %b
+; CHECK-NEXT:    [[UDIV:%.*]] = zext i9 [[DIV]] to i32
+; CHECK-NEXT:    ret i32 [[UDIV]]
+;
+  %za = zext i9 %a to i32
+  %zb = zext i9 %b to i32
+  %udiv = udiv i32 %za, %zb
+  ret i32 %udiv
+}
+
+define i32 @urem_i32(i8 %a, i8 %b) {
+; CHECK-LABEL: @urem_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 %a, %b
+; CHECK-NEXT:    [[UREM:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[UREM]]
+;
+  %za = zext i8 %a to i32
+  %zb = zext i8 %b to i32
+  %urem = urem i32 %za, %zb
+  ret i32 %urem
+}
+
+define <2 x i32> @urem_i32_vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @urem_i32_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem <2 x i8> %a, %b
+; CHECK-NEXT:    [[UREM:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[UREM]]
+;
+  %za = zext <2 x i8> %a to <2 x i32>
+  %zb = zext <2 x i8> %b to <2 x i32>
+  %urem = urem <2 x i32> %za, %zb
+  ret <2 x i32> %urem
+}
+
+define i32 @urem_i32_multiuse(i8 %a, i8 %b) {
+; CHECK-LABEL: @urem_i32_multiuse(
+; CHECK-NEXT:    [[ZA:%.*]] = zext i8 %a to i32
+; CHECK-NEXT:    [[ZB:%.*]] = zext i8 %b to i32
+; CHECK-NEXT:    [[UREM:%.*]] = urem i32 [[ZA]], [[ZB]]
+; CHECK-NEXT:    [[EXTRA_USES:%.*]] = add nuw nsw i32 [[ZA]], [[ZB]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw nsw i32 [[UREM]], [[EXTRA_USES]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %za = zext i8 %a to i32
+  %zb = zext i8 %b to i32
+  %urem = urem i32 %za, %zb
+  %extra_uses = add i32 %za, %zb
+  %r = mul i32 %urem, %extra_uses
+  ret i32 %r
+}
+
+define i32 @urem_illegal_type(i9 %a, i9 %b) {
+; CHECK-LABEL: @urem_illegal_type(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i9 %a, %b
+; CHECK-NEXT:    [[UREM:%.*]] = zext i9 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[UREM]]
+;
+  %za = zext i9 %a to i32
+  %zb = zext i9 %b to i32
+  %urem = urem i32 %za, %zb
+  ret i32 %urem
+}
+
+define i32 @udiv_i32_c(i8 %a) {
+; CHECK-LABEL: @udiv_i32_c(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i8 %a, 10
+; CHECK-NEXT:    [[UDIV:%.*]] = zext i8 [[DIV]] to i32
+; CHECK-NEXT:    ret i32 [[UDIV]]
+;
+  %za = zext i8 %a to i32
+  %udiv = udiv i32 %za, 10
+  ret i32 %udiv
+}
+
+define <2 x i32> @udiv_i32_c_vec(<2 x i8> %a) {
+; CHECK-LABEL: @udiv_i32_c_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i8> %a, <i8 10, i8 17>
+; CHECK-NEXT:    [[UDIV:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[UDIV]]
+;
+  %za = zext <2 x i8> %a to <2 x i32>
+  %udiv = udiv <2 x i32> %za, <i32 10, i32 17>
+  ret <2 x i32> %udiv
+}
+
+define i32 @udiv_i32_c_multiuse(i8 %a) {
+; CHECK-LABEL: @udiv_i32_c_multiuse(
+; CHECK-NEXT:    [[ZA:%.*]] = zext i8 %a to i32
+; CHECK-NEXT:    [[UDIV:%.*]] = udiv i32 [[ZA]], 10
+; CHECK-NEXT:    [[EXTRA_USE:%.*]] = add nuw nsw i32 [[UDIV]], [[ZA]]
+; CHECK-NEXT:    ret i32 [[EXTRA_USE]]
+;
+  %za = zext i8 %a to i32
+  %udiv = udiv i32 %za, 10
+  %extra_use = add i32 %za, %udiv
+  ret i32 %extra_use
+}
+
+define i32 @udiv_illegal_type_c(i9 %a) {
+; CHECK-LABEL: @udiv_illegal_type_c(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i9 %a, 10
+; CHECK-NEXT:    [[UDIV:%.*]] = zext i9 [[DIV]] to i32
+; CHECK-NEXT:    ret i32 [[UDIV]]
+;
+  %za = zext i9 %a to i32
+  %udiv = udiv i32 %za, 10
+  ret i32 %udiv
+}
+
+define i32 @urem_i32_c(i8 %a) {
+; CHECK-LABEL: @urem_i32_c(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 %a, 10
+; CHECK-NEXT:    [[UREM:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[UREM]]
+;
+  %za = zext i8 %a to i32
+  %urem = urem i32 %za, 10
+  ret i32 %urem
+}
+
+define <2 x i32> @urem_i32_c_vec(<2 x i8> %a) {
+; CHECK-LABEL: @urem_i32_c_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem <2 x i8> %a, <i8 10, i8 17>
+; CHECK-NEXT:    [[UREM:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[UREM]]
+;
+  %za = zext <2 x i8> %a to <2 x i32>
+  %urem = urem <2 x i32> %za, <i32 10, i32 17>
+  ret <2 x i32> %urem
+}
+
+define i32 @urem_i32_c_multiuse(i8 %a) {
+; CHECK-LABEL: @urem_i32_c_multiuse(
+; CHECK-NEXT:    [[ZA:%.*]] = zext i8 %a to i32
+; CHECK-NEXT:    [[UREM:%.*]] = urem i32 [[ZA]], 10
+; CHECK-NEXT:    [[EXTRA_USE:%.*]] = add nuw nsw i32 [[UREM]], [[ZA]]
+; CHECK-NEXT:    ret i32 [[EXTRA_USE]]
+;
+  %za = zext i8 %a to i32
+  %urem = urem i32 %za, 10
+  %extra_use = add i32 %za, %urem
+  ret i32 %extra_use
+}
+
+define i32 @urem_illegal_type_c(i9 %a) {
+; CHECK-LABEL: @urem_illegal_type_c(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i9 %a, 10
+; CHECK-NEXT:    [[UREM:%.*]] = zext i9 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[UREM]]
+;
+  %za = zext i9 %a to i32
+  %urem = urem i32 %za, 10
+  ret i32 %urem
+}
+
+define i32 @udiv_c_i32(i8 %a) {
+; CHECK-LABEL: @udiv_c_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv i8 10, %a
+; CHECK-NEXT:    [[UDIV:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[UDIV]]
+;
+  %za = zext i8 %a to i32
+  %udiv = udiv i32 10, %za
+  ret i32 %udiv
+}
+
+define i32 @urem_c_i32(i8 %a) {
+; CHECK-LABEL: @urem_c_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 10, %a
+; CHECK-NEXT:    [[UREM:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[UREM]]
+;
+  %za = zext i8 %a to i32
+  %urem = urem i32 10, %za
+  ret i32 %urem
+}
+
+; Make sure constexpr is handled.
+
+@b = external global [1 x i8]
+
+define i32 @udiv_constexpr(i8 %a) {
+; CHECK-LABEL: @udiv_constexpr(
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv i8 %a, ptrtoint ([1 x i8]* @b to i8)
+; CHECK-NEXT:    [[D:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %za = zext i8 %a to i32
+  %d = udiv i32 %za, zext (i8 ptrtoint ([1 x i8]* @b to i8) to i32)
+  ret i32 %d
+}
+
diff --git a/llvm/test/Transforms/InstCombine/umax-icmp.ll b/llvm/test/Transforms/InstCombine/umax-icmp.ll
new file mode 100644
index 00000000000..eabd41ceb62
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/umax-icmp.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; If we have a umax feeding an unsigned or equality icmp that shares an
+; operand with the umax, the compare should always be folded.
+; Test all 4 foldable predicates (eq,ne,ugt,ule) * 4 commutation
+; possibilities for each predicate. Note that folds to true/false
+; (predicate = uge/ult) or folds to an existing instruction should be
+; handled by InstSimplify.
+
+; umax(X, Y) == X --> X >= Y
+
+define i1 @eq_umax1(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_umax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ugt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp eq i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @eq_umax2(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_umax2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ugt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp eq i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the max op to the RHS.
+
+define i1 @eq_umax3(i32 %a, i32 %y) {
+; CHECK-LABEL: @eq_umax3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ugt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp eq i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @eq_umax4(i32 %a, i32 %y) {
+; CHECK-LABEL: @eq_umax4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ugt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp eq i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; umax(X, Y) <= X --> X >= Y
+
+define i1 @ule_umax1(i32 %x, i32 %y) {
+; CHECK-LABEL: @ule_umax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ugt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ule i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @ule_umax2(i32 %x, i32 %y) {
+; CHECK-LABEL: @ule_umax2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ugt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ule i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the max op to the RHS.
+
+define i1 @ule_umax3(i32 %a, i32 %y) {
+; CHECK-LABEL: @ule_umax3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ugt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp uge i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @ule_umax4(i32 %a, i32 %y) {
+; CHECK-LABEL: @ule_umax4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ugt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp uge i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; umax(X, Y) != X --> X < Y
+
+define i1 @ne_umax1(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_umax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ugt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ne i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @ne_umax2(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_umax2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 %y, %x
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp1 = icmp ugt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ne i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the max op to the RHS.
+
+define i1 @ne_umax3(i32 %a, i32 %y) {
+; CHECK-LABEL: @ne_umax3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ugt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ne i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @ne_umax4(i32 %a, i32 %y) {
+; CHECK-LABEL: @ne_umax4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ugt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ne i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; umax(X, Y) > X --> X < Y
+
+define i1 @ugt_umax1(i32 %x, i32 %y) {
+; CHECK-LABEL: @ugt_umax1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ugt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ugt i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @ugt_umax2(i32 %x, i32 %y) {
+; CHECK-LABEL: @ugt_umax2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 %y, %x
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp1 = icmp ugt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ugt i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the max op to the RHS.
+
+define i1 @ugt_umax3(i32 %a, i32 %y) {
+; CHECK-LABEL: @ugt_umax3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ugt i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ult i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute max operands.
+
+define i1 @ugt_umax4(i32 %a, i32 %y) {
+; CHECK-LABEL: @ugt_umax4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ugt i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ult i32 %x, %sel
+  ret i1 %cmp2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/umin-icmp.ll b/llvm/test/Transforms/InstCombine/umin-icmp.ll
new file mode 100644
index 00000000000..47954be5ab4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/umin-icmp.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; If we have a umin feeding an unsigned or equality icmp that shares an
+; operand with the umin, the compare should always be folded.
+; Test all 4 foldable predicates (eq,ne,uge,ult) * 4 commutation
+; possibilities for each predicate. Note that folds to true/false
+; (predicate is ule/ugt) or folds to an existing instruction should be
+; handled by InstSimplify.
+
+; umin(X, Y) == X --> X <= Y
+
+define i1 @eq_umin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_umin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ult i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp eq i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @eq_umin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @eq_umin2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ult i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp eq i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @eq_umin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @eq_umin3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ult i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp eq i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @eq_umin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @eq_umin4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ult i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp eq i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; umin(X, Y) >= X --> X <= Y
+
+define i1 @uge_umin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @uge_umin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ult i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp uge i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @uge_umin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @uge_umin2(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ult i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp uge i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @uge_umin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @uge_umin3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ult i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ule i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @uge_umin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @uge_umin4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ule i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ult i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ule i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; umin(X, Y) != X --> X > Y
+
+define i1 @ne_umin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_umin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ult i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ne i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @ne_umin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @ne_umin2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 %y, %x
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp1 = icmp ult i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ne i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @ne_umin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @ne_umin3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ult i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ne i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @ne_umin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @ne_umin4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ult i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ne i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; umin(X, Y) < X --> X > Y
+
+define i1 @ult_umin1(i32 %x, i32 %y) {
+; CHECK-LABEL: @ult_umin1(
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 %x, %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp1 = icmp ult i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ult i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @ult_umin2(i32 %x, i32 %y) {
+; CHECK-LABEL: @ult_umin2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 %y, %x
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp1 = icmp ult i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ult i32 %sel, %x
+  ret i1 %cmp2
+}
+
+; Disguise the icmp predicate by commuting the min op to the RHS.
+
+define i1 @ult_umin3(i32 %a, i32 %y) {
+; CHECK-LABEL: @ult_umin3(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ult i32 %x, %y
+  %sel = select i1 %cmp1, i32 %x, i32 %y
+  %cmp2 = icmp ugt i32 %x, %sel
+  ret i1 %cmp2
+}
+
+; Commute min operands.
+
+define i1 @ult_umin4(i32 %a, i32 %y) {
+; CHECK-LABEL: @ult_umin4(
+; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[X]], %y
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %x = add i32 %a, 3 ; thwart complexity-based canonicalization
+  %cmp1 = icmp ult i32 %y, %x
+  %sel = select i1 %cmp1, i32 %y, i32 %x
+  %cmp2 = icmp ugt i32 %x, %sel
+  ret i1 %cmp2
+}
+
diff --git a/llvm/test/Transforms/InstCombine/unavailable-debug.ll b/llvm/test/Transforms/InstCombine/unavailable-debug.ll
new file mode 100644
index 00000000000..703c1c2898f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unavailable-debug.ll
@@ -0,0 +1,81 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Make sure to update the debug value after dead code elimination.
+; CHECK: %call = call signext i8 @b(i32 6), !dbg !39
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !30, metadata !DIExpression()), !dbg !38
+
+@e = common local_unnamed_addr global i8 0, align 1, !dbg !0
+@c = common local_unnamed_addr global i32 0, align 4, !dbg !6
+@d = common local_unnamed_addr global i32 0, align 4, !dbg !10
+
+define signext i8 @b(i32 %f) local_unnamed_addr #0 !dbg !18 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %f, metadata !22, metadata !DIExpression()), !dbg !23
+  %conv = trunc i32 %f to i8, !dbg !24
+  ret i8 %conv, !dbg !25
+}
+
+define i32 @main() local_unnamed_addr #0 !dbg !26 {
+entry:
+  %0 = load i8, i8* @e, align 1, !dbg !31, !tbaa !32
+  %conv = sext i8 %0 to i32, !dbg !31
+  store i32 %conv, i32* @c, align 4, !dbg !35, !tbaa !36
+  call void @llvm.dbg.value(metadata i32 -1372423381, metadata !30, metadata !DIExpression()), !dbg !38
+  %call = call signext i8 @b(i32 6), !dbg !39
+  %conv1 = sext i8 %call to i32, !dbg !39
+  call void @llvm.dbg.value(metadata i32 %conv1, metadata !30, metadata !DIExpression()), !dbg !38
+  %1 = load i32, i32* @d, align 4, !dbg !40, !tbaa !36
+  %call2 = call i32 (...) @optimize_me_not(), !dbg !41
+  ret i32 0, !dbg !42
+}
+
+declare i32 @optimize_me_not(...) local_unnamed_addr #1
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13, !14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "e", scope: !2, file: !3, line: 3, type: !12, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project b306ef12f046353ea5bda4b3b77759e57909a0db)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU)
+!3 = !DIFile(filename: "a.c", directory: "/Users/davide/llvm/build/bin")
+!4 = !{}
+!5 = !{!6, !10, !0}
+!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression())
+!7 = distinct !DIGlobalVariable(name: "c", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true)
+!8 = !DIDerivedType(tag: DW_TAG_typedef, name: "a", file: !3, line: 1, baseType: !9)
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression())
+!11 = distinct !DIGlobalVariable(name: "d", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true)
+!12 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"wchar_size", i32 4}
+!16 = !{i32 7, !"PIC Level", i32 2}
+!17 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project b306ef12f046353ea5bda4b3b77759e57909a0db)"}
+!18 = distinct !DISubprogram(name: "b", scope: !3, file: !3, line: 4, type: !19, scopeLine: 4, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!12, !9}
+!21 = !{!22}
+!22 = !DILocalVariable(name: "f", arg: 1, scope: !18, file: !3, line: 4, type: !9)
+!23 = !DILocation(line: 4, column: 9, scope: !18)
+!24 = !DILocation(line: 4, column: 21, scope: !18)
+!25 = !DILocation(line: 4, column: 14, scope: !18)
+!26 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 5, type: !27, scopeLine: 5, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !29)
+!27 = !DISubroutineType(types: !28)
+!28 = !{!9}
+!29 = !{!30}
+!30 = !DILocalVariable(name: "l_1499", scope: !26, file: !3, line: 7, type: !8)
+!31 = !DILocation(line: 6, column: 7, scope: !26)
+!32 = !{!33, !33, i64 0}
+!33 = !{!"omnipotent char", !34, i64 0}
+!34 = !{!"Simple C/C++ TBAA"}
+!35 = !DILocation(line: 6, column: 5, scope: !26)
+!36 = !{!37, !37, i64 0}
+!37 = !{!"int", !33, i64 0}
+!38 = !DILocation(line: 7, column: 5, scope: !26)
+!39 = !DILocation(line: 8, column: 12, scope: !26)
+!40 = !DILocation(line: 9, column: 11, scope: !26)
+!41 = !DILocation(line: 10, column: 3, scope: !26)
+!42 = !DILocation(line: 11, column: 1, scope: !26)
diff --git a/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-scalar.ll b/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-scalar.ll
new file mode 100644
index 00000000000..b3bc74d3cfe
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-scalar.ll
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; If we have a masked merge, in the form of: (M is constant)
+;   ((x ^ y) & M) ^ y
+; Unfold it to
+;   (x & M) | (y & ~M)
+
+define i4 @scalar0 (i4 %x, i4 %y) {
+; CHECK-LABEL: @scalar0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[Y:%.*]], -2
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, 1
+  %r  = xor i4 %n1, %y
+  ret i4 %r
+}
+
+define i4 @scalar1 (i4 %x, i4 %y) {
+; CHECK-LABEL: @scalar1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[Y:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %n1, %y
+  ret i4 %r
+}
+
+; ============================================================================ ;
+; Various cases with %x and/or %y being a constant
+; ============================================================================ ;
+
+define i4 @in_constant_varx_mone(i4 %x, i4 %mask) {
+; CHECK-LABEL: @in_constant_varx_mone(
+; CHECK-NEXT:    [[R1:%.*]] = or i4 [[X:%.*]], -2
+; CHECK-NEXT:    ret i4 [[R1]]
+;
+  %n0 = xor i4 %x, -1 ; %x
+  %n1 = and i4 %n0, 1
+  %r = xor i4 %n1, -1
+  ret i4 %r
+}
+
+define i4 @in_constant_varx_14(i4 %x, i4 %mask) {
+; CHECK-LABEL: @in_constant_varx_14(
+; CHECK-NEXT:    [[R1:%.*]] = or i4 [[X:%.*]], -2
+; CHECK-NEXT:    ret i4 [[R1]]
+;
+  %n0 = xor i4 %x, 14 ; %x
+  %n1 = and i4 %n0, 1
+  %r = xor i4 %n1, 14
+  ret i4 %r
+}
+
+define i4 @in_constant_mone_vary(i4 %y, i4 %mask) {
+; CHECK-LABEL: @in_constant_mone_vary(
+; CHECK-NEXT:    [[N0:%.*]] = and i4 [[Y:%.*]], 1
+; CHECK-NEXT:    [[N1:%.*]] = xor i4 [[N0]], 1
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %y, -1 ; %x
+  %n1 = and i4 %n0, 1
+  %r = xor i4 %n1, %y
+  ret i4 %r
+}
+
+define i4 @in_constant_14_vary(i4 %y, i4 %mask) {
+; CHECK-LABEL: @in_constant_14_vary(
+; CHECK-NEXT:    [[R:%.*]] = and i4 [[Y:%.*]], -2
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %y, 14 ; %x
+  %n1 = and i4 %n0, 1
+  %r = xor i4 %n1, %y
+  ret i4 %r
+}
+
+; ============================================================================ ;
+; Commutativity
+; ============================================================================ ;
+
+; Used to make sure that the IR complexity sorting does not interfere.
+declare i4 @gen4()
+
+define i4 @c_1_0_0 (i4 %x, i4 %y) {
+; CHECK-LABEL: @c_1_0_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[Y:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %y, %x ; swapped order
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %n1, %y
+  ret i4 %r
+}
+
+define i4 @c_0_1_0 (i4 %x, i4 %y) {
+; CHECK-LABEL: @c_0_1_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[Y:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[X:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %n1, %x ; %x instead of %y
+  ret i4 %r
+}
+
+define i4 @c_0_0_1 () {
+; CHECK-LABEL: @c_0_0_1(
+; CHECK-NEXT:    [[X:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[Y:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[X]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[Y]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %x  = call i4 @gen4()
+  %y  = call i4 @gen4()
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %y, %n1 ; swapped order
+  ret i4 %r
+}
+
+define i4 @c_1_1_0 (i4 %x, i4 %y) {
+; CHECK-LABEL: @c_1_1_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[Y:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[X:%.*]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %y, %x ; swapped order
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %n1, %x ; %x instead of %y
+  ret i4 %r
+}
+
+define i4 @c_1_0_1 (i4 %x) {
+; CHECK-LABEL: @c_1_0_1(
+; CHECK-NEXT:    [[Y:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[X:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[Y]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %y  = call i4 @gen4()
+  %n0 = xor i4 %y, %x ; swapped order
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %y, %n1 ; swapped order
+  ret i4 %r
+}
+
+define i4 @c_0_1_1 (i4 %y) {
+; CHECK-LABEL: @c_0_1_1(
+; CHECK-NEXT:    [[X:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[Y:%.*]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[X]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %x  = call i4 @gen4()
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %x, %n1 ; swapped order, %x instead of %y
+  ret i4 %r
+}
+
+define i4 @c_1_1_1 () {
+; CHECK-LABEL: @c_1_1_1(
+; CHECK-NEXT:    [[X:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[Y:%.*]] = call i4 @gen4()
+; CHECK-NEXT:    [[TMP1:%.*]] = and i4 [[Y]], -2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i4 [[X]], 1
+; CHECK-NEXT:    [[R:%.*]] = or i4 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %x  = call i4 @gen4()
+  %y  = call i4 @gen4()
+  %n0 = xor i4 %y, %x ; swapped order
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %x, %n1 ; swapped order, %x instead of %y
+  ret i4 %r
+}
+
+define i4 @commutativity_constant_14_vary(i4 %y, i4 %mask) {
+; CHECK-LABEL: @commutativity_constant_14_vary(
+; CHECK-NEXT:    [[R:%.*]] = and i4 [[Y:%.*]], -2
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %y, 14 ; %x
+  %n1 = and i4 %n0, 1
+  %r = xor i4 %y, %n1 ; swapped
+  ret i4 %r
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use4(i4)
+
+define i4 @n_oneuse_D (i4 %x, i4 %y) {
+; CHECK-LABEL: @n_oneuse_D(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], -2
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(i4 [[N0]])
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %x, %y ; two uses of %n0, which is going to be replaced
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %n1, %y
+  call void @use4(i4 %n0)
+  ret i4 %r
+}
+
+define i4 @n_oneuse_A (i4 %x, i4 %y) {
+; CHECK-LABEL: @n_oneuse_A(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], -2
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(i4 [[N1]])
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, -2 ; two uses of %n1, which is going to be replaced
+  %r  = xor i4 %n1, %y
+  call void @use4(i4 %n1)
+  ret i4 %r
+}
+
+define i4 @n_oneuse_AD (i4 %x, i4 %y) {
+; CHECK-LABEL: @n_oneuse_AD(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], -2
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(i4 [[N0]])
+; CHECK-NEXT:    call void @use4(i4 [[N1]])
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, -2 ; two uses of %n1, which is going to be replaced
+  %r  = xor i4 %n1, %y
+  call void @use4(i4 %n0)
+  call void @use4(i4 %n1)
+  ret i4 %r
+}
+
+; Mask is not constant
+
+define i4 @n_var_mask (i4 %x, i4 %y, i4 %m) {
+; CHECK-LABEL: @n_var_mask(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Y]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, %m
+  %r  = xor i4 %n1, %y
+  ret i4 %r
+}
+
+; Some third variable is used
+
+define i4 @n_third_var (i4 %x, i4 %y, i4 %z) {
+; CHECK-LABEL: @n_third_var(
+; CHECK-NEXT:    [[N0:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and i4 [[N0]], -2
+; CHECK-NEXT:    [[R:%.*]] = xor i4 [[N1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i4 [[R]]
+;
+  %n0 = xor i4 %x, %y
+  %n1 = and i4 %n0, -2
+  %r  = xor i4 %n1, %z ; not %x or %y
+  ret i4 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll b/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll
new file mode 100644
index 00000000000..f87b52b9743
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unfold-masked-merge-with-const-mask-vector.ll
@@ -0,0 +1,350 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; If we have a masked merge, in the form of: (M is constant)
+;   ((x ^ y) & M) ^ y
+; Unfold it to
+;   (x & M) | (y & ~M)
+
+define <2 x i4> @splat (<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <3 x i4> @splat_undef (<3 x i4> %x, <3 x i4> %y) {
+; CHECK-LABEL: @splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i4> [[X:%.*]], <i4 -2, i4 undef, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i4> [[Y:%.*]], <i4 1, i4 undef, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <3 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <3 x i4> [[R]]
+;
+  %n0 = xor <3 x i4> %x, %y
+  %n1 = and <3 x i4> %n0, <i4 -2, i4 undef, i4 -2>
+  %r  = xor <3 x i4> %n1, %y
+  ret <3 x i4> %r
+}
+
+define <2 x i4> @nonsplat (<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], <i4 -2, i4 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], <i4 1, i4 -2>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 1>
+  %r  = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+; ============================================================================ ;
+; Various cases with %x and/or %y being a constant
+; ============================================================================ ;
+
+define <2 x i4> @in_constant_varx_mone(<2 x i4> %x, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_varx_mone(
+; CHECK-NEXT:    [[R1:%.*]] = or <2 x i4> [[X:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    ret <2 x i4> [[R1]]
+;
+  %n0 = xor <2 x i4> %x, <i4 -1, i4 -1> ; %x
+  %n1 = and <2 x i4> %n0, <i4 1, i4 1>
+  %r = xor <2 x i4> %n1, <i4 -1, i4 -1>
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @in_constant_varx_14(<2 x i4> %x, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_varx_14(
+; CHECK-NEXT:    [[R1:%.*]] = or <2 x i4> [[X:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    ret <2 x i4> [[R1]]
+;
+  %n0 = xor <2 x i4> %x, <i4 14, i4 14> ; %x
+  %n1 = and <2 x i4> %n0, <i4 1, i4 1>
+  %r = xor <2 x i4> %n1, <i4 14, i4 14>
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @in_constant_varx_14_nonsplat(<2 x i4> %x, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_varx_14_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], <i4 -2, i4 6>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, <i4 14, i4 7> ; %x
+  %n1 = and <2 x i4> %n0, <i4 1, i4 1>
+  %r = xor <2 x i4> %n1, <i4 14, i4 7>
+  ret <2 x i4> %r
+}
+
+define <3 x i4> @in_constant_varx_14_undef(<3 x i4> %x, <3 x i4> %mask) {
+; CHECK-LABEL: @in_constant_varx_14_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i4> [[X:%.*]], <i4 1, i4 undef, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <3 x i4> [[TMP1]], <i4 -2, i4 undef, i4 6>
+; CHECK-NEXT:    ret <3 x i4> [[R]]
+;
+  %n0 = xor <3 x i4> %x, <i4 14, i4 undef, i4 7> ; %x
+  %n1 = and <3 x i4> %n0, <i4 1, i4 undef, i4 1>
+  %r = xor <3 x i4> %n1, <i4 14, i4 undef, i4 7>
+  ret <3 x i4> %r
+}
+
+define <2 x i4> @in_constant_mone_vary(<2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_mone_vary(
+; CHECK-NEXT:    [[N0:%.*]] = and <2 x i4> [[Y:%.*]], <i4 1, i4 1>
+; CHECK-NEXT:    [[N1:%.*]] = xor <2 x i4> [[N0]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %y, <i4 -1, i4 -1> ; %x
+  %n1 = and <2 x i4> %n0, <i4 1, i4 1>
+  %r = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @in_constant_14_vary(<2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_14_vary(
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i4> [[Y:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %y, <i4 14, i4 14> ; %x
+  %n1 = and <2 x i4> %n0, <i4 1, i4 1>
+  %r = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @in_constant_14_vary_nonsplat(<2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @in_constant_14_vary_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], <i4 0, i4 1>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %y, <i4 14, i4 7> ; %x
+  %n1 = and <2 x i4> %n0, <i4 1, i4 1>
+  %r = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <3 x i4> @in_constant_14_vary_undef(<3 x i4> %y, <3 x i4> %mask) {
+; CHECK-LABEL: @in_constant_14_vary_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i4> [[Y:%.*]], <i4 -2, i4 undef, i4 -2>
+; CHECK-NEXT:    [[R:%.*]] = or <3 x i4> [[TMP1]], <i4 0, i4 undef, i4 1>
+; CHECK-NEXT:    ret <3 x i4> [[R]]
+;
+  %n0 = xor <3 x i4> %y, <i4 14, i4 undef, i4 7> ; %x
+  %n1 = and <3 x i4> %n0, <i4 1, i4 undef, i4 1>
+  %r = xor <3 x i4> %n1, %y
+  ret <3 x i4> %r
+}
+
+; ============================================================================ ;
+; Commutativity
+; ============================================================================ ;
+
+; Used to make sure that the IR complexity sorting does not interfere.
+declare <2 x i4> @gen4()
+
+define <2 x i4> @c_1_0_0 (<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @c_1_0_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[Y:%.*]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %y, %x ; swapped order
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_0_1_0 (<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @c_0_1_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %n1, %x ; %x instead of %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_0_0_1 () {
+; CHECK-LABEL: @c_0_0_1(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[X]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[Y]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %x  = call <2 x i4> @gen4()
+  %y  = call <2 x i4> @gen4()
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %y, %n1 ; swapped order
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_1_1_0 (<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @c_1_1_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[X:%.*]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %y, %x ; swapped order
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %n1, %x ; %x instead of %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_1_0_1 (<2 x i4> %x) {
+; CHECK-LABEL: @c_1_0_1(
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[X:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[Y]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %y  = call <2 x i4> @gen4()
+  %n0 = xor <2 x i4> %y, %x ; swapped order
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %y, %n1 ; swapped order
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_0_1_1 (<2 x i4> %y) {
+; CHECK-LABEL: @c_0_1_1(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[Y:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[X]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %x  = call <2 x i4> @gen4()
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %x, %n1 ; swapped order, %x instead of %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @c_1_1_1 () {
+; CHECK-LABEL: @c_1_1_1(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i4> @gen4()
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i4> [[Y]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i4> [[X]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i4> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %x  = call <2 x i4> @gen4()
+  %y  = call <2 x i4> @gen4()
+  %n0 = xor <2 x i4> %y, %x ; swapped order
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %x, %n1 ; swapped order, %x instead of %y
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @commutativity_constant_14_vary(<2 x i4> %y, <2 x i4> %mask) {
+; CHECK-LABEL: @commutativity_constant_14_vary(
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i4> [[Y:%.*]], <i4 -2, i4 -2>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %y, <i4 14, i4 14> ; %x
+  %n1 = and <2 x i4> %n0, <i4 1, i4 1>
+  %r = xor <2 x i4> %y, %n1 ; swapped
+  ret <2 x i4> %r
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+; One use only.
+
+declare void @use4(<2 x i4>)
+
+define <2 x i4> @n_oneuse_D (<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @n_oneuse_D(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(<2 x i4> [[N0]])
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, %y ; two uses of %n0, which is going to be replaced
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2>
+  %r  = xor <2 x i4> %n1, %y
+  call void @use4(<2 x i4> %n0)
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @n_oneuse_A (<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @n_oneuse_A(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(<2 x i4> [[N1]])
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2> ; two uses of %n1, which is going to be replaced
+  %r  = xor <2 x i4> %n1, %y
+  call void @use4(<2 x i4> %n1)
+  ret <2 x i4> %r
+}
+
+define <2 x i4> @n_oneuse_AD (<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @n_oneuse_AD(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], <i4 -2, i4 -2>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    call void @use4(<2 x i4> [[N0]])
+; CHECK-NEXT:    call void @use4(<2 x i4> [[N1]])
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, <i4 -2, i4 -2> ; two uses of %n1, which is going to be replaced
+  %r  = xor <2 x i4> %n1, %y
+  call void @use4(<2 x i4> %n0)
+  call void @use4(<2 x i4> %n1)
+  ret <2 x i4> %r
+}
+
+; Mask is not constant
+
+define <2 x i4> @n_var_mask (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) {
+; CHECK-LABEL: @n_var_mask(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], [[M:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], [[Y]]
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, %y
+  %n1 = and <2 x i4> %n0, %m
+  %r  = xor <2 x i4> %n1, %y
+  ret <2 x i4> %r
+}
+
+; Some third variable is used
+
+define <2 x i4> @n_differenty(<2 x i4> %x, <2 x i4> %mask) {
+; CHECK-LABEL: @n_differenty(
+; CHECK-NEXT:    [[N0:%.*]] = xor <2 x i4> [[X:%.*]], <i4 -2, i4 7>
+; CHECK-NEXT:    [[N1:%.*]] = and <2 x i4> [[N0]], <i4 1, i4 1>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i4> [[N1]], <i4 7, i4 -2>
+; CHECK-NEXT:    ret <2 x i4> [[R]]
+;
+  %n0 = xor <2 x i4> %x, <i4 14, i4 7> ; %x
+  %n1 = and <2 x i4> %n0, <i4 1, i4 1>
+  %r = xor <2 x i4> %n1, <i4 7, i4 14>
+  ret <2 x i4> %r
+}
diff --git a/llvm/test/Transforms/InstCombine/unlocked-stdio-mingw.ll b/llvm/test/Transforms/InstCombine/unlocked-stdio-mingw.ll
new file mode 100644
index 00000000000..f41b86654cb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unlocked-stdio-mingw.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S -mtriple=x86_64-w64-mingw32 | FileCheck %s
+
+%struct._iobuf = type { i8*, i32, i8*, i32, i32, i32, i32, i8* }
+
+@.str = private unnamed_addr constant [5 x i8] c"file\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"w\00", align 1
+
+; Check that this still uses the plain fputc instead of fputc_unlocked
+; for MinGW targets.
+define void @external_fputc_test() {
+; CHECK-LABEL: @external_fputc_test(
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._iobuf* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @fputc(i32 99, %struct._iobuf* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %call = call %struct._iobuf* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %call1 = call i32 @fputc(i32 99, %struct._iobuf* %call)
+  ret void
+}
+
+declare %struct._iobuf* @fopen(i8*, i8*)
+declare i32 @fputc(i32, %struct._iobuf* nocapture)
diff --git a/llvm/test/Transforms/InstCombine/unlocked-stdio.ll b/llvm/test/Transforms/InstCombine/unlocked-stdio.ll
new file mode 100644
index 00000000000..f10772bc802
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unlocked-stdio.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@.str = private unnamed_addr constant [5 x i8] c"file\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"w\00", align 1
+@.str.2 = private unnamed_addr constant [4 x i8] c"str\00", align 1
+@stdout = external global %struct._IO_FILE*, align 8
+@global_file = common global %struct._IO_FILE* null, align 8
+
+define void @external_fgetc_test(%struct._IO_FILE* %f) {
+; CHECK-LABEL: @external_fgetc_test(
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @fgetc(%struct._IO_FILE* [[F:%.*]])
+; CHECK-NEXT:    ret void
+;
+  %call = call i32 @fgetc(%struct._IO_FILE* %f)
+  ret void
+}
+
+declare i32 @fgetc(%struct._IO_FILE* nocapture) #0
+
+define void @external_fgetc_test2() {
+; CHECK-LABEL: @external_fgetc_test2(
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[FPUTC_UNLOCKED:%.*]] = call i32 @fputc_unlocked(i32 99, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %call1 = call i32 @fputc(i32 99, %struct._IO_FILE* %call)
+  ret void
+}
+
+declare %struct._IO_FILE* @fopen(i8*, i8*)
+declare i32 @fputc(i32, %struct._IO_FILE* nocapture) #0
+
+define internal void @fgetc_test() {
+; CHECK-LABEL: @fgetc_test(
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[FGETC_UNLOCKED:%.*]] = call i32 @fgetc_unlocked(%struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %call1 = call i32 @fgetc(%struct._IO_FILE* %call)
+  ret void
+}
+
+define void @external_fgetc_internal_test() {
+; CHECK-LABEL: @external_fgetc_internal_test(
+; CHECK-NEXT:    call void @fgetc_test()
+; CHECK-NEXT:    ret void
+;
+  call void @fgetc_test()
+  ret void
+}
+
+define internal void @fwrite_test() {
+; CHECK-LABEL: @fwrite_test(
+; CHECK-NEXT:    [[S:%.*]] = alloca [10 x i8], align 1
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[S]], i64 0, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @fwrite_unlocked(i8* nonnull [[ARRAYDECAY]], i64 10, i64 10, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %s = alloca [10 x i8], align 1
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %s, i64 0, i64 0
+  %call1 = call i64 @fwrite(i8* nonnull %arraydecay, i64 10, i64 10, %struct._IO_FILE* %call)
+  ret void
+}
+
+define internal void @fread_test() {
+; CHECK-LABEL: @fread_test(
+; CHECK-NEXT:    [[S:%.*]] = alloca [10 x i8], align 1
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[S]], i64 0, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @fread_unlocked(i8* nonnull [[ARRAYDECAY]], i64 10, i64 10, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %s = alloca [10 x i8], align 1
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %s, i64 0, i64 0
+  %call1 = call i64 @fread(i8* nonnull %arraydecay, i64 10, i64 10, %struct._IO_FILE* %call)
+  ret void
+}
+
+define internal void @fputs_test() {
+; CHECK-LABEL: @fputs_test(
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @fwrite_unlocked(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i64 3, i64 1, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %call1 = call i32 @fputs(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), %struct._IO_FILE* %call)
+  ret void
+}
+
+define internal void @fgets_test() {
+; CHECK-LABEL: @fgets_test(
+; CHECK-NEXT:    [[BUF:%.*]] = alloca [10 x i8], align 1
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[BUF]], i64 0, i64 0
+; CHECK-NEXT:    [[FGETS_UNLOCKED:%.*]] = call i8* @fgets_unlocked(i8* nonnull [[ARRAYDECAY]], i32 10, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %buf = alloca [10 x i8], align 1
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %buf, i64 0, i64 0
+  %call1 = call i8* @fgets(i8* nonnull %arraydecay, i32 10, %struct._IO_FILE* %call)
+  ret void
+}
+
+define internal void @fputc_test() {
+; CHECK-LABEL: @fputc_test(
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[FPUTC_UNLOCKED:%.*]] = call i32 @fputc_unlocked(i32 99, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %call1 = call i32 @fputc(i32 99, %struct._IO_FILE* %call)
+  ret void
+}
+
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:    call void @fwrite_test()
+; CHECK-NEXT:    call void @fread_test()
+; CHECK-NEXT:    call void @fputs_test()
+; CHECK-NEXT:    call void @fgets_test()
+; CHECK-NEXT:    call void @fputc_test()
+; CHECK-NEXT:    call void @fgetc_test()
+; CHECK-NEXT:    ret i32 0
+;
+  call void @fwrite_test()
+  call void @fread_test()
+  call void @fputs_test()
+  call void @fgets_test()
+  call void @fputc_test()
+  call void @fgetc_test()
+  ret i32 0
+}
+
+declare i32 @fclose(%struct._IO_FILE* nocapture)
+
+define void @test_with_fclose() {
+; CHECK-LABEL: @test_with_fclose(
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @fwrite_unlocked(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i64 3, i64 1, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @fclose(%struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0)) #2
+  %call1 = call i64 @fwrite(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i64 3, i64 1, %struct._IO_FILE* %call)
+  %call2 = call i32 @fclose(%struct._IO_FILE* %call) #2
+  ret void
+}
+
+declare void @modify_file(%struct._IO_FILE*)
+
+define void @test_captured_by_function(){
+; CHECK-LABEL: @test_captured_by_function(
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    call void @modify_file(%struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    [[CALL1:%.*]] = call i64 @fwrite(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i64 3, i64 1, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @fclose(%struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0)) #2
+  call void @modify_file(%struct._IO_FILE* %call) #2
+  %call1 = call i64 @fwrite(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i64 3, i64 1, %struct._IO_FILE* %call)
+  %call2 = call i32 @fclose(%struct._IO_FILE* %call) #2
+  ret void
+}
+
+define void @test_captured_by_global_value() {
+; CHECK-LABEL: @test_captured_by_global_value(
+; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[DOTCAST:%.*]] = ptrtoint %struct._IO_FILE* [[CALL]] to i64
+; CHECK-NEXT:    store i64 [[DOTCAST]], i64* bitcast (%struct._IO_FILE** @global_file to i64*), align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = call i64 @fwrite(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i64 3, i64 1, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @fclose(%struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    ret void
+;
+  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0)) #2
+  %.cast = ptrtoint %struct._IO_FILE* %call to i64
+  store i64 %.cast, i64* bitcast (%struct._IO_FILE** @global_file to i64*), align 8
+  %call1 = call i64 @fwrite(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i64 3, i64 1, %struct._IO_FILE* %call)
+  %call2 = call i32 @fclose(%struct._IO_FILE* %call) #2
+  ret void
+}
+
+define void @test_captured_by_standard_stream(i8* nocapture readonly %s) {
+; CHECK-LABEL: @test_captured_by_standard_stream(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[TMP:%.*]] = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @fputs(i8* [[S:%.*]], %struct._IO_FILE* [[TMP]])
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @fclose(%struct._IO_FILE* [[TMP]])
+; CHECK-NEXT:    ret void
+;
+  %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %tmp = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
+  %call1 = tail call i32 @fputs(i8* %s, %struct._IO_FILE* %tmp)
+  %call2 = tail call i32 @fclose(%struct._IO_FILE* %tmp)
+  ret void
+}
+
+define void @test_captured_by_arg(i8* nocapture readonly %s, %struct._IO_FILE* nocapture %file) {
+; CHECK-LABEL: @test_captured_by_arg(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @fputs(i8* [[S:%.*]], %struct._IO_FILE* [[FILE:%.*]])
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @fclose(%struct._IO_FILE* [[FILE]])
+; CHECK-NEXT:    ret void
+;
+  %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
+  %call1 = tail call i32 @fputs(i8* %s, %struct._IO_FILE* %file)
+  %call2 = tail call i32 @fclose(%struct._IO_FILE* %file)
+  ret void
+}
+
+declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture)
+declare i64 @fread(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture)
+declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture)
+declare i8* @fgets(i8*, i32, %struct._IO_FILE* nocapture)
diff --git a/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll b/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll
new file mode 100644
index 00000000000..0eb729047e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unordered-fcmp-select.ll
@@ -0,0 +1,125 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @select_max_ugt(
+; CHECK: %cmp.inv = fcmp ole float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_max_ugt(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_uge(
+; CHECK: %cmp.inv = fcmp olt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_max_uge(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ugt(
+; CHECK: %cmp.inv = fcmp ole float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_min_ugt(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_uge(
+; CHECK: %cmp.inv = fcmp olt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_min_uge(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_ult(
+; CHECK: %cmp.inv = fcmp oge float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_max_ult(float %a, float %b) {
+  %cmp = fcmp ult float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_ule(
+; CHECK: %cmp.inv = fcmp ogt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK: ret float %sel
+define float @select_max_ule(float %a, float %b) {
+  %cmp = fcmp ule float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ult(
+; CHECK: %cmp.inv = fcmp oge float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_min_ult(float %a, float %b) {
+  %cmp = fcmp ult float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ule(
+; CHECK: %cmp.inv = fcmp ogt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_min_ule(float %a, float %b) {
+  %cmp = fcmp ule float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_fcmp_une(
+; CHECK:  %cmp.inv = fcmp oeq float %a, %b
+; CHECK-NEXT:  %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_fcmp_une(float %a, float %b) {
+  %cmp = fcmp une float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_fcmp_ueq
+; CHECK:  %cmp.inv = fcmp one float %a, %b
+; CHECK-NEXT:  %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_fcmp_ueq(float %a, float %b) {
+  %cmp = fcmp ueq float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+declare void @foo(i1)
+
+; CHECK-LABEL: @select_max_ugt_2_use_cmp(
+; CHECK: fcmp ugt
+; CHECK-NOT: fcmp
+; CHECK: ret
+define float @select_max_ugt_2_use_cmp(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  call void @foo(i1 %cmp)
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_uge_2_use_cmp(
+; CHECK: fcmp uge
+; CHECK-NOT: fcmp
+; CHECK: ret
+define float @select_min_uge_2_use_cmp(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  call void @foo(i1 %cmp)
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
diff --git a/llvm/test/Transforms/InstCombine/unpack-fca.ll b/llvm/test/Transforms/InstCombine/unpack-fca.ll
new file mode 100644
index 00000000000..3c5e4177d69
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unpack-fca.ll
@@ -0,0 +1,239 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%A__vtbl = type { i8*, i32 (%A*)* }
+%A = type { %A__vtbl* }
+%B = type { i8*, i64 }
+
+@A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }
+
+declare i32 @A.foo(%A* nocapture %this)
+
+define void @storeA(%A* %a.ptr) {
+; CHECK-LABEL: storeA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+  store %A { %A__vtbl* @A__vtblZ }, %A* %a.ptr, align 8
+  ret void
+}
+
+define void @storeB(%B* %b.ptr) {
+; CHECK-LABEL: storeB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8
+; CHECK-NEXT: ret void
+  store %B { i8* null, i64 42 }, %B* %b.ptr, align 8
+  ret void
+}
+
+define void @storeStructOfA({ %A }* %sa.ptr) {
+; CHECK-LABEL: storeStructOfA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+  store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8
+  ret void
+}
+
+define void @storeArrayOfA([1 x %A]* %aa.ptr) {
+; CHECK-LABEL: storeArrayOfA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+  store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %aa.ptr, align 8
+  ret void
+}
+
+define void @storeLargeArrayOfA([2000 x %A]* %aa.ptr) {
+; CHECK-LABEL: storeLargeArrayOfA
+; CHECK-NEXT: store [2000 x %A]
+; CHECK-NEXT: ret void
+  %i1 = insertvalue [2000 x %A] undef, %A { %A__vtbl* @A__vtblZ }, 1
+  store [2000 x %A] %i1, [2000 x %A]* %aa.ptr, align 8
+  ret void
+}
+
+define void @storeStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
+; CHECK-LABEL: storeStructOfArrayOfA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+  store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %saa.ptr, align 8
+  ret void
+}
+
+define void @storeArrayOfB([2 x %B]* %ab.ptr, [2 x %B] %ab) {
+; CHECK-LABEL: storeArrayOfB
+; CHECK-NEXT: [[EVB0:%[a-z0-9\.]+]] = extractvalue [2 x %B] %ab, 0
+; CHECK-NEXT: [[GEP0:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: [[EV0:%[a-z0-9\.]+]] = extractvalue %B [[EVB0]], 0
+; CHECK-NEXT: store i8* [[EV0]], i8** [[GEP0]], align 8
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 0, i32 1
+; CHECK-NEXT: [[EV1:%[a-z0-9\.]+]] = extractvalue %B [[EVB0]], 1
+; CHECK-NEXT: store i64 [[EV1]], i64* [[GEP1]], align 8
+; CHECK-NEXT: [[EVB1:%[a-z0-9\.]+]] = extractvalue [2 x %B] %ab, 1
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 1, i32 0
+; CHECK-NEXT: [[EV2:%[a-z0-9\.]+]] = extractvalue %B [[EVB1]], 0
+; CHECK-NEXT: store i8* [[EV2]], i8** [[GEP2]], align 8
+; CHECK-NEXT: [[GEP3:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 1, i32 1
+; CHECK-NEXT: [[EV3:%[a-z0-9\.]+]] = extractvalue %B [[EVB1]], 1
+; CHECK-NEXT: store i64 [[EV3]], i64* [[GEP3]], align 8
+; CHECK-NEXT: ret void
+  store [2 x %B] %ab, [2 x %B]* %ab.ptr, align 8
+  ret void
+}
+
+define %A @loadA(%A* %a.ptr) {
+; CHECK-LABEL: loadA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: ret %A [[IV]]
+  %1 = load %A, %A* %a.ptr, align 8
+  ret %A %1
+}
+
+define %B @loadB(%B* %b.ptr) {
+; CHECK-LABEL: loadB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: [[LOAD1:%[a-z0-9\.]+]] = load i8*, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD1]], 0
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: [[LOAD2:%[a-z0-9\.]+]] = load i64, i64* [[GEP2]], align 8
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue %B [[IV1]], i64 [[LOAD2]], 1
+; CHECK-NEXT: ret %B [[IV2]]
+  %1 = load %B, %B* %b.ptr, align 8
+  ret %B %1
+}
+
+define { %A } @loadStructOfA({ %A }* %sa.ptr) {
+; CHECK-LABEL: loadStructOfA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue { %A } undef, %A [[IV1]], 0
+; CHECK-NEXT: ret { %A } [[IV2]]
+  %1 = load { %A }, { %A }* %sa.ptr, align 8
+  ret { %A } %1
+}
+
+define [1 x %A] @loadArrayOfA([1 x %A]* %aa.ptr) {
+; CHECK-LABEL: loadArrayOfA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0
+; CHECK-NEXT: ret [1 x %A] [[IV2]]
+  %1 = load [1 x %A], [1 x %A]* %aa.ptr, align 8
+  ret [1 x %A] %1
+}
+
+define { [1 x %A] } @loadStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
+; CHECK-LABEL: loadStructOfArrayOfA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0
+; CHECK-NEXT: [[IV3:%[a-z0-9\.]+]] = insertvalue { [1 x %A] } undef, [1 x %A] [[IV2]], 0
+; CHECK-NEXT: ret { [1 x %A] } [[IV3]]
+  %1 = load { [1 x %A] }, { [1 x %A] }* %saa.ptr, align 8
+  ret { [1 x %A] } %1
+}
+
+define { %A } @structOfA({ %A }* %sa.ptr) {
+; CHECK-LABEL: structOfA
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret { %A } { %A { %A__vtbl* @A__vtblZ } }
+  store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8
+  %1 = load { %A }, { %A }* %sa.ptr, align 8
+  ret { %A } %1
+}
+
+define %B @structB(%B* %b.ptr) {
+; CHECK-LABEL: structB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8
+; CHECK-NEXT: ret %B { i8* null, i64 42 }
+  store %B { i8* null, i64 42 }, %B* %b.ptr, align 8
+  %1 = load %B, %B* %b.ptr, align 8
+  ret %B %1
+}
+
+define [2 x %B] @loadArrayOfB([2 x %B]* %ab.ptr) {
+; CHECK-LABEL: loadArrayOfB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD1:%[a-z0-9\.]+]] = load i8*, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD1]], 0
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 0, i32 1
+; CHECK-NEXT: [[LOAD2:%[a-z0-9\.]+]] = load i64, i64* [[GEP2]], align 8
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue %B [[IV1]], i64 [[LOAD2]], 1
+; CHECK-NEXT: [[IV3:%[a-z0-9\.]+]] = insertvalue [2 x %B] undef, %B [[IV2]], 0
+; CHECK-NEXT: [[GEP3:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 1, i32 0
+; CHECK-NEXT: [[LOAD3:%[a-z0-9\.]+]] = load i8*, i8** [[GEP3]], align 8
+; CHECK-NEXT: [[IV4:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD3]], 0
+; CHECK-NEXT: [[GEP4:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 1, i32 1
+; CHECK-NEXT: [[LOAD4:%[a-z0-9\.]+]] = load i64, i64* [[GEP4]], align 8
+; CHECK-NEXT: [[IV5:%[a-z0-9\.]+]] = insertvalue %B [[IV4]], i64 [[LOAD4]], 1
+; CHECK-NEXT: [[IV6:%[a-z0-9\.]+]] = insertvalue [2 x %B] [[IV3]], %B [[IV5]], 1
+; CHECK-NEXT: ret [2 x %B] [[IV6]]
+  %1 = load [2 x %B], [2 x %B]* %ab.ptr, align 8
+  ret [2 x %B] %1
+}
+
+define [2000 x %B] @loadLargeArrayOfB([2000 x %B]* %ab.ptr) {
+; CHECK-LABEL: loadLargeArrayOfB
+; CHECK-NEXT: load [2000 x %B], [2000 x %B]* %ab.ptr, align 8
+; CHECK-NEXT: ret [2000 x %B]
+  %1 = load [2000 x %B], [2000 x %B]* %ab.ptr, align 8
+  ret [2000 x %B] %1
+}
+
+%struct.S = type <{ i8, %struct.T }>
+%struct.T = type { i32, i32 }
+
+; Make sure that we do not increase alignment of packed struct element
+define i32 @packed_alignment(%struct.S* dereferenceable(9) %s) {
+; CHECK-LABEL: packed_alignment
+; CHECK-NEXT: %tv.elt1 = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 1, i32 1
+; CHECK-NEXT: %tv.unpack2 = load i32, i32* %tv.elt1, align 1
+; CHECK-NEXT: ret i32 %tv.unpack2
+  %t = getelementptr inbounds %struct.S, %struct.S* %s, i32 0, i32 1
+  %tv = load %struct.T, %struct.T* %t, align 1
+  %v = extractvalue %struct.T %tv, 1
+  ret i32 %v
+}
+
+%struct.U = type {i8, i8, i8, i8, i8, i8, i8, i8, i64}
+
+define void @check_alignment(%struct.U* %u, %struct.U* %v) {
+; CHECK-LABEL: check_alignment
+; CHECK: load i8, i8* {{.*}}, align 8
+; CHECK: load i8, i8* {{.*}}, align 1
+; CHECK: load i8, i8* {{.*}}, align 2
+; CHECK: load i8, i8* {{.*}}, align 1
+; CHECK: load i8, i8* {{.*}}, align 4
+; CHECK: load i8, i8* {{.*}}, align 1
+; CHECK: load i8, i8* {{.*}}, align 2
+; CHECK: load i8, i8* {{.*}}, align 1
+; CHECK: load i64, i64* {{.*}}, align 8
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 8
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 1
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 2
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 1
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 4
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 1
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 2
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 1
+; CHECK: store i64 {{.*}}, i64* {{.*}}, align 8
+  %1 = load %struct.U, %struct.U* %u
+  store %struct.U %1, %struct.U* %v
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/unrecognized_three-way-comparison.ll b/llvm/test/Transforms/InstCombine/unrecognized_three-way-comparison.ll
new file mode 100644
index 00000000000..dcd046e6760
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unrecognized_three-way-comparison.ll
@@ -0,0 +1,459 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare void @foo(i32 %x)
+
+define i32 @compare_against_arbitrary_value(i32 %x, i32 %c) {
+; CHECK-LABEL: @compare_against_arbitrary_value(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, %c
+  %cmp2 = icmp slt i32 %x, %c
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_zero(i32 %x) {
+; CHECK-LABEL: @compare_against_zero(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 0
+  %cmp2 = icmp slt i32 %x, 0
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_one(i32 %x) {
+; CHECK-LABEL: @compare_against_one(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 1
+  %cmp2 = icmp slt i32 %x, 1
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_two(i32 %x) {
+; CHECK-LABEL: @compare_against_two(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 2
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 2
+  %cmp2 = icmp slt i32 %x, 2
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_three(i32 %x) {
+; CHECK-LABEL: @compare_against_three(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 3
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 3
+  %cmp2 = icmp slt i32 %x, 3
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_four(i32 %x) {
+; CHECK-LABEL: @compare_against_four(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 4
+  %cmp2 = icmp slt i32 %x, 4
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_five(i32 %x) {
+; CHECK-LABEL: @compare_against_five(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 5
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 5
+  %cmp2 = icmp slt i32 %x, 5
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_six(i32 %x) {
+; CHECK-LABEL: @compare_against_six(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 6
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 6
+  %cmp2 = icmp slt i32 %x, 6
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+; Same as @compare_against_arbitrary_value, but now the three-way comparison
+; returns not idiomatic comparator's result (-1, 0, 1) but some other constants.
+define i32 @compare_against_arbitrary_value_non_idiomatic_1(i32 %x, i32 %c) {
+; CHECK-LABEL: @compare_against_arbitrary_value_non_idiomatic_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 425)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, %c
+  %cmp2 = icmp slt i32 %x, %c
+  %select1 = select i1 %cmp2, i32 -6, i32 425
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_zero_non_idiomatic_add(i32 %x) {
+; CHECK-LABEL: @compare_against_zero_non_idiomatic_add(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 425)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 0
+  %cmp2 = icmp slt i32 %x, 0
+  %select1 = select i1 %cmp2, i32 -6, i32 425
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+; Same as @compare_against_arbitrary_value, but now the three-way comparison
+; returns not idiomatic comparator's result (-1, 0, 1) but some other constants.
+define i32 @compare_against_arbitrary_value_non_idiomatic_2(i32 %x, i32 %c) {
+; CHECK-LABEL: @compare_against_arbitrary_value_non_idiomatic_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 425)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, %c
+  %cmp2 = icmp slt i32 %x, %c
+  %select1 = select i1 %cmp2, i32 -5, i32 425
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_zero_non_idiomatic_or(i32 %x) {
+; CHECK-LABEL: @compare_against_zero_non_idiomatic_or(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 425)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i32 %x, 0
+  %cmp2 = icmp slt i32 %x, 0
+  %select1 = select i1 %cmp2, i32 -5, i32 425
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_arbitrary_value_type_mismatch(i64 %x, i64 %c) {
+; CHECK-LABEL: @compare_against_arbitrary_value_type_mismatch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i64 [[X:%.*]], [[C:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i64 %x, %c
+  %cmp2 = icmp slt i64 %x, %c
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_zero_type_mismatch_idiomatic(i64 %x) {
+; CHECK-LABEL: @compare_against_zero_type_mismatch_idiomatic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i64 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i64 %x, 0
+  %cmp2 = icmp slt i64 %x, 0
+  %select1 = select i1 %cmp2, i32 -1, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_zero_type_mismatch_non_idiomatic_1(i64 %x) {
+; CHECK-LABEL: @compare_against_zero_type_mismatch_non_idiomatic_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i64 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i64 %x, 0
+  %cmp2 = icmp slt i64 %x, 0
+  %select1 = select i1 %cmp2, i32 -7, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
+define i32 @compare_against_zero_type_mismatch_non_idiomatic_2(i64 %x) {
+; CHECK-LABEL: @compare_against_zero_type_mismatch_non_idiomatic_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i64 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP0]], label [[CALLFOO:%.*]], label [[EXIT:%.*]]
+; CHECK:       callfoo:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 42
+;
+
+entry:
+  %cmp1 = icmp eq i64 %x, 0
+  %cmp2 = icmp slt i64 %x, 0
+  %select1 = select i1 %cmp2, i32 -6, i32 1
+  %select2 = select i1 %cmp1, i32 0, i32 %select1
+  %cond = icmp sgt i32 %select2, 0
+  br i1 %cond, label %callfoo, label %exit
+
+callfoo:
+  call void @foo(i32 %select2)
+  br label %exit
+
+exit:
+  ret i32 42
+}
+
diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
new file mode 100644
index 00000000000..44aa7deb4ac
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Canonicalization of unsigned saturated subtraction idioms to
+; usub.sat() intrinsics is tested here.
+
+declare void @use(i64)
+
+; (a > b) ? a - b : 0 -> usub.sat(a, b)
+
+define i64 @max_sub_ugt(i64 %a, i64 %b) {
+; CHECK-LABEL: @max_sub_ugt(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %cmp = icmp ugt i64 %a, %b
+  %sub = sub i64 %a, %b
+  %sel = select i1 %cmp, i64 %sub ,i64 0
+  ret i64 %sel
+}
+
+; (a >= b) ? a - b : 0 -> usub.sat(a, b)
+
+define i64 @max_sub_uge(i64 %a, i64 %b) {
+; CHECK-LABEL: @max_sub_uge(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %cmp = icmp uge i64 %a, %b
+  %sub = sub i64 %a, %b
+  %sel = select i1 %cmp, i64 %sub ,i64 0
+  ret i64 %sel
+}
+
+; Again, with vectors:
+; (a > b) ? a - b : 0 -> usub.sat(a, b)
+
+define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @max_sub_ugt_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %cmp = icmp ugt <4 x i32> %a, %b
+  %sub = sub <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> zeroinitializer
+  ret <4 x i32> %sel
+}
+
+; Use extra ops to thwart icmp swapping canonicalization.
+; (b < a) ? a - b : 0 -> usub.sat(a, b)
+
+define i64 @max_sub_ult(i64 %a, i64 %b) {
+; CHECK-LABEL: @max_sub_ult(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
+; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %cmp = icmp ult i64 %b, %a
+  %sub = sub i64 %a, %b
+  %sel = select i1 %cmp, i64 %sub ,i64 0
+  %extrasub = sub i64 %b, %a
+  call void @use(i64 %extrasub)
+  ret i64 %sel
+}
+
+; (b > a) ? 0 : a - b -> usub.sat(a, b)
+
+define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
+; CHECK-LABEL: @max_sub_ugt_sel_swapped(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
+; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %cmp = icmp ugt i64 %b, %a
+  %sub = sub i64 %a, %b
+  %sel = select i1 %cmp, i64 0 ,i64 %sub
+  %extrasub = sub i64 %b, %a
+  call void @use(i64 %extrasub)
+  ret i64 %sel
+}
+
+; (a < b) ? 0 : a - b -> usub.sat(a, b)
+
+define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) {
+; CHECK-LABEL: @max_sub_ult_sel_swapped(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %cmp = icmp ult i64 %a, %b
+  %sub = sub i64 %a, %b
+  %sel = select i1 %cmp, i64 0 ,i64 %sub
+  ret i64 %sel
+}
+
+; ((a > b) ? b - a : 0) -> -usub.sat(a, b)
+
+define i64 @neg_max_sub_ugt(i64 %a, i64 %b) {
+; CHECK-LABEL: @neg_max_sub_ugt(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
+; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %cmp = icmp ugt i64 %a, %b
+  %sub = sub i64 %b, %a
+  %sel = select i1 %cmp, i64 %sub ,i64 0
+  %extrasub = sub i64 %a, %b
+  call void @use(i64 %extrasub)
+  ret i64 %sel
+}
+
+; ((b < a) ? b - a : 0) -> -usub.sat(a, b)
+
+define i64 @neg_max_sub_ult(i64 %a, i64 %b) {
+; CHECK-LABEL: @neg_max_sub_ult(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %cmp = icmp ult i64 %b, %a
+  %sub = sub i64 %b, %a
+  %sel = select i1 %cmp, i64 %sub ,i64 0
+  ret i64 %sel
+}
+
+; ((b > a) ? 0 : b - a) -> -usub.sat(a, b)
+
+define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
+; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %cmp = icmp ugt i64 %b, %a
+  %sub = sub i64 %b, %a
+  %sel = select i1 %cmp, i64 0 ,i64 %sub
+  ret i64 %sel
+}
+
+; ((a < b) ? 0 : b - a) -> -usub.sat(a, b)
+
+define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) {
+; CHECK-LABEL: @neg_max_sub_ult_sel_swapped(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
+; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %cmp = icmp ult i64 %a, %b
+  %sub = sub i64 %b, %a
+  %sel = select i1 %cmp, i64 0 ,i64 %sub
+  %extrasub = sub i64 %a, %b
+  call void @use(i64 %extrasub)
+  ret i64 %sel
+}
+
diff --git a/llvm/test/Transforms/InstCombine/urem-simplify-bug.ll b/llvm/test/Transforms/InstCombine/urem-simplify-bug.ll
new file mode 100644
index 00000000000..4f18f359854
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/urem-simplify-bug.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@.str = internal constant [5 x i8] c"foo\0A\00"
+@.str1 = internal constant [5 x i8] c"bar\0A\00"
+
+define i32 @main() nounwind  {
+entry:
+  %x = call i32 @func_11() nounwind
+  %tmp3 = or i32 %x, -5
+  %tmp5 = urem i32 251, %tmp3
+  %tmp6 = icmp ne i32 %tmp5, 0
+  %tmp67 = zext i1 %tmp6 to i32
+  %tmp9 = urem i32 %tmp67, 95
+  %tmp10 = and i32 %tmp9, 1
+  %tmp12 = icmp eq i32 %tmp10, 0
+  br i1 %tmp12, label %bb14, label %bb
+
+bb:
+  br label %bb15
+
+bb14:
+  br label %bb15
+
+bb15:
+  %iftmp.0.0 = phi i8* [ getelementptr ([5 x i8], [5 x i8]* @.str1, i32 0, i32 0), %bb14 ], [ getelementptr ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), %bb ]
+  %tmp17 = call i32 (i8*, ...) @printf(i8* %iftmp.0.0) nounwind
+  ret i32 0
+}
+
+; CHECK-LABEL: define i32 @main(
+; CHECK: call i32 @func_11()
+; CHECK-NEXT: br i1 false, label %bb14, label %bb
+
+declare i32 @func_11()
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/llvm/test/Transforms/InstCombine/vararg.ll b/llvm/test/Transforms/InstCombine/vararg.ll
new file mode 100644
index 00000000000..111cb4de7bc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vararg.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+
+define i32 @func(i8* nocapture readnone %fmt, ...) {
+; CHECK-LABEL: @func(
+; CHECK: entry:
+; CHECK-NEXT: ret i32 0
+entry:
+  %va0 = alloca %struct.__va_list, align 8
+  %va1 = alloca %struct.__va_list, align 8
+  %0 = bitcast %struct.__va_list* %va0 to i8*
+  %1 = bitcast %struct.__va_list* %va1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* %0)
+  call void @llvm.va_start(i8* %0)
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* %1)
+  call void @llvm.va_copy(i8* %1, i8* %0)
+  call void @llvm.va_end(i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* %1)
+  call void @llvm.va_end(i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* %0)
+  ret i32 0
+}
+
diff --git a/llvm/test/Transforms/InstCombine/vec-binop-select.ll b/llvm/test/Transforms/InstCombine/vec-binop-select.ll
new file mode 100644
index 00000000000..abf4729e369
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec-binop-select.ll
@@ -0,0 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Non-canonical mask
+
+define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @and(
+; CHECK-NEXT:    [[R:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+  %r = and <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @or(
+; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  %r = or <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Non-canonical masks
+
+define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @xor(
+; CHECK-NEXT:    [[R:%.*]] = xor <4 x i32> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = xor <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Flags
+
+define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @add(
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Negative test - wrong operand
+
+define <4 x i32> @add_wrong_op(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: @add_wrong_op(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %z, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Negative test - wrong mask (but we could handle this...)
+
+define <4 x i32> @add_non_select_mask(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @add_non_select_mask(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Negative test - wrong mask (but we could handle this...)
+
+define <4 x i32> @add_masks_with_undefs(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @add_masks_with_undefs(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Non-commutative opcode
+
+define <4 x i32> @sub(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @sub(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = sub <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  %r = sub <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @mul(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @mul(
+; CHECK-NEXT:    [[R:%.*]] = mul nuw <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  %r = mul nuw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @sdiv(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @sdiv(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = sdiv <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %r = sdiv <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @udiv(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @udiv(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = udiv <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  %r = udiv <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @srem(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @srem(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = srem <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %r = srem <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @urem(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @urem(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = urem <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  %r = urem <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shl(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @shl(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = shl nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %r = shl nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @lshr(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @lshr(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = lshr exact <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  %r = lshr exact <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @ashr(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @ashr(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = ashr <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = ashr <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+define <4 x float> @fadd(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @fadd(
+; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %sel1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = fadd <4 x float> %sel1, %sel2
+  ret <4 x float> %r
+}
+
+define <4 x float> @fsub(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @fsub(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = fsub fast <4 x float> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %sel1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = fsub fast <4 x float> %sel1, %sel2
+  ret <4 x float> %r
+}
+
+define <4 x double> @fmul(<4 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: @fmul(
+; CHECK-NEXT:    [[R:%.*]] = fmul nnan <4 x double> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[R]]
+;
+  %sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x double> %y, <4 x double> %x, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = fmul nnan <4 x double> %sel1, %sel2
+  ret <4 x double> %r
+}
+
+define <4 x double> @fdiv(<4 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: @fdiv(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = fdiv nnan arcp <4 x double> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x double> [[R]]
+;
+  %sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x double> %y, <4 x double> %x, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = fdiv arcp nnan <4 x double> %sel1, %sel2
+  ret <4 x double> %r
+}
+
+define <4 x double> @frem(<4 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: @frem(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = frem <4 x double> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x double> [[R]]
+;
+  %sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %sel2 = shufflevector <4 x double> %y, <4 x double> %x, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = frem <4 x double> %sel1, %sel2
+  ret <4 x double> %r
+}
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
new file mode 100644
index 00000000000..a56152cca59
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -0,0 +1,640 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test2(float %f) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul float [[F:%.*]], [[F]]
+; CHECK-NEXT:    [[TMP21:%.*]] = bitcast float [[TMP5]] to i32
+; CHECK-NEXT:    ret i32 [[TMP21]]
+;
+  %tmp5 = fmul float %f, %f
+  %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
+  %tmp19 = bitcast <4 x float> %tmp12 to <4 x i32>
+  %tmp21 = extractelement <4 x i32> %tmp19, i32 0
+  ret i32 %tmp21
+}
+
+define void @get_image() nounwind {
+; CHECK-LABEL: @get_image(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @fgetc(i8* null) #0
+; CHECK-NEXT:    br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %0 = call i32 @fgetc(i8* null) nounwind               ; <i32> [#uses=1]
+  %1 = trunc i32 %0 to i8         ; <i8> [#uses=1]
+  %tmp2 = insertelement <100 x i8> zeroinitializer, i8 %1, i32 1          ; <<100 x i8>> [#uses=1]
+  %tmp1 = extractelement <100 x i8> %tmp2, i32 0          ; <i8> [#uses=1]
+  %2 = icmp eq i8 %tmp1, 80               ; <i1> [#uses=1]
+  br i1 %2, label %bb2, label %bb3
+
+bb2:            ; preds = %entry
+  br label %bb3
+
+bb3:            ; preds = %bb2, %entry
+  unreachable
+}
+
+; PR4340
+define void @vac(<4 x float>* nocapture %a) nounwind {
+; CHECK-LABEL: @vac(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store <4 x float> zeroinitializer, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp1 = load <4 x float>, <4 x float>* %a		; <<4 x float>> [#uses=1]
+  %vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0	; <<4 x float>> [#uses=1]
+  %vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
+  %vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
+  %vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
+  store <4 x float> %vecins8, <4 x float>* %a
+  ret void
+}
+
+declare i32 @fgetc(i8*)
+
+define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
+; CHECK-LABEL: @dead_shuffle_elt(
+; CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> [[SHUFFLE_I]], <4 x float> [[X:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[SHUFFLE9_I]]
+;
+  %shuffle.i = shufflevector <2 x float> %y, <2 x float> %y, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %shuffle9.i = shufflevector <4 x float> %x, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %shuffle9.i
+}
+
+define <2 x float> @test_fptrunc(double %f) {
+; CHECK-LABEL: @test_fptrunc(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[F:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+;
+  %tmp9 = insertelement <4 x double> undef, double %f, i32 0
+  %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x double> %tmp10, double 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x double> %tmp11, double 0.000000e+00, i32 3
+  %tmp5 = fptrunc <4 x double> %tmp12 to <4 x float>
+  %ret = shufflevector <4 x float> %tmp5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %ret
+}
+
+define <2 x double> @test_fpext(float %f) {
+; CHECK-LABEL: @test_fpext(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 0.000000e+00>, float [[F:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+  %tmp9 = insertelement <4 x float> undef, float %f, i32 0
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
+  %tmp5 = fpext <4 x float> %tmp12 to <4 x double>
+  %ret = shufflevector <4 x double> %tmp5, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %ret
+}
+
+define <4 x double> @test_shuffle(<4 x double> %f) {
+; CHECK-LABEL: @test_shuffle(
+; CHECK-NEXT:    [[RET1:%.*]] = insertelement <4 x double> [[F:%.*]], double 1.000000e+00, i32 3
+; CHECK-NEXT:    ret <4 x double> [[RET1]]
+;
+  %ret = shufflevector <4 x double> %f, <4 x double> <double undef, double 1.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  ret <4 x double> %ret
+}
+
+define <4 x float> @test_select(float %f, float %g) {
+; CHECK-LABEL: @test_select(
+; CHECK-NEXT:    [[A3:%.*]] = insertelement <4 x float> <float undef, float undef, float undef, float 3.000000e+00>, float [[F:%.*]], i32 0
+; CHECK-NEXT:    [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[RET]]
+;
+  %a0 = insertelement <4 x float> undef, float %f, i32 0
+  %a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
+  %a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
+  %a3 = insertelement <4 x float> %a2, float 3.000000e+00, i32 3
+  %b0 = insertelement <4 x float> undef, float %g, i32 0
+  %b1 = insertelement <4 x float> %b0, float 4.000000e+00, i32 1
+  %b2 = insertelement <4 x float> %b1, float 5.000000e+00, i32 2
+  %b3 = insertelement <4 x float> %b2, float 6.000000e+00, i32 3
+  %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> %b3
+  ret <4 x float> %ret
+}
+
+; Check that instcombine doesn't wrongly fold away the select completely.
+
+define <2 x i64> @PR24922(<2 x i64> %v) {
+; CHECK-LABEL: @PR24922(
+; CHECK-NEXT:    [[RESULT1:%.*]] = insertelement <2 x i64> [[V:%.*]], i64 0, i32 0
+; CHECK-NEXT:    ret <2 x i64> [[RESULT1]]
+;
+  %result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
+  ret <2 x i64> %result
+}
+
+; The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
+
+define <4 x float> @inselt_shuf_no_demand(float %a1, float %a2, float %a3) {
+; CHECK-LABEL: @inselt_shuf_no_demand(
+; CHECK-NEXT:    ret <4 x float> undef
+;
+  %out1 = insertelement <4 x float> undef, float %a1, i32 1
+  %out12 = insertelement <4 x float> %out1, float %a2, i32 2
+  %out123 = insertelement <4 x float> %out12, float %a3, i32 3
+  %shuffle = shufflevector <4 x float> %out123, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  ret <4 x float> %shuffle
+}
+
+; The shuffle only demands the 0th (undef) element of 'out123', so everything should fold away.
+
+define <4 x float> @inselt_shuf_no_demand_commute(float %a1, float %a2, float %a3) {
+; CHECK-LABEL: @inselt_shuf_no_demand_commute(
+; CHECK-NEXT:    ret <4 x float> undef
+;
+  %out1 = insertelement <4 x float> undef, float %a1, i32 1
+  %out12 = insertelement <4 x float> %out1, float %a2, i32 2
+  %out123 = insertelement <4 x float> %out12, float %a3, i32 3
+  %shuffle = shufflevector <4 x float> undef, <4 x float> %out123, <4 x i32> <i32 4, i32 undef, i32 undef, i32 undef>
+  ret <4 x float> %shuffle
+}
+
+; The add uses 'out012' giving it multiple uses after the shuffle is transformed to also
+; use 'out012'. The analysis should be able to see past that.
+
+define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b) {
+; CHECK-LABEL: @inselt_shuf_no_demand_multiuse(
+; CHECK-NEXT:    [[OUT0:%.*]] = insertelement <4 x i32> undef, i32 [[A0:%.*]], i32 0
+; CHECK-NEXT:    [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 [[A1:%.*]], i32 1
+; CHECK-NEXT:    [[FOO:%.*]] = add <4 x i32> [[OUT01]], [[B:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[FOO]]
+;
+  %out0 = insertelement <4 x i32> undef, i32 %a0, i32 0
+  %out01 = insertelement <4 x i32> %out0, i32 %a1, i32 1
+  %out012 = insertelement <4 x i32> %out01, i32 %a0, i32 2
+  %foo = add <4 x i32> %out012, %b
+  %out0123 = insertelement <4 x i32> %foo, i32 %a1, i32 3
+  %shuffle = shufflevector <4 x i32> %out0123, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, float %a2, float %a3, i32 %variable_index) {
+; CHECK-LABEL: @inselt_shuf_no_demand_bogus_insert_index_in_chain(
+; CHECK-NEXT:    [[OUT12:%.*]] = insertelement <4 x float> undef, float [[A2:%.*]], i32 [[VARIABLE_INDEX:%.*]]
+; CHECK-NEXT:    ret <4 x float> [[OUT12]]
+;
+  %out1 = insertelement <4 x float> undef, float %a1, i32 1
+  %out12 = insertelement <4 x float> %out1, float %a2, i32 %variable_index ; something unexpected
+  %out123 = insertelement <4 x float> %out12, float %a3, i32 3
+  %shuffle = shufflevector <4 x float> %out123, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  ret <4 x float> %shuffle
+}
+
+; Test undef replacement in constant vector elements with binops.
+
+define <3 x i8> @shuf_add(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_add(
+; CHECK-NEXT:    [[BO:%.*]] = add <3 x i8> [[X:%.*]], <i8 undef, i8 2, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 2>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = add nsw <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 2>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_sub(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_sub(
+; CHECK-NEXT:    [[BO:%.*]] = sub <3 x i8> <i8 1, i8 undef, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i8> [[BO]]
+;
+  %bo = sub nuw <3 x i8> <i8 1, i8 2, i8 3>, %x
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 2>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_mul(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_mul(
+; CHECK-NEXT:    [[BO:%.*]] = mul <3 x i8> [[X:%.*]], <i8 1, i8 undef, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = mul nsw <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 2, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_and(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_and(
+; CHECK-NEXT:    [[BO:%.*]] = and <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = and <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 1, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_or(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_or(
+; CHECK-NEXT:    [[BO:%.*]] = or <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = or <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_xor(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_xor(
+; CHECK-NEXT:    [[BO:%.*]] = xor <3 x i8> [[X:%.*]], <i8 1, i8 undef, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = xor <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_lshr_const_op0(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_lshr_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = lshr <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = lshr <3 x i8> <i8 1, i8 2, i8 3>, %x
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_lshr_const_op1(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_lshr_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = lshr exact <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = lshr exact <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_ashr_const_op0(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_ashr_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = lshr <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = ashr <3 x i8> <i8 1, i8 2, i8 3>, %x
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_ashr_const_op1(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_ashr_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = ashr exact <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = ashr exact <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_shl_const_op0(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_shl_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = shl nsw <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = shl nsw <3 x i8> <i8 1, i8 2, i8 3>, %x
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_shl_const_op1(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_shl_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = shl nuw <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = shl nuw <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_sdiv_const_op0(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_sdiv_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = sdiv exact <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = sdiv exact <3 x i8> <i8 1, i8 2, i8 3>, %x
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 1>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_sdiv_const_op1(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_sdiv_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = sdiv <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = sdiv <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_srem_const_op0(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_srem_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = srem <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 2>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = srem <3 x i8> <i8 1, i8 2, i8 3>, %x
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 1, i32 undef, i32 2>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_srem_const_op1(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_srem_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = srem <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 1>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = srem <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 1>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_udiv_const_op0(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_udiv_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = udiv exact <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = udiv exact <3 x i8> <i8 1, i8 2, i8 3>, %x
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_udiv_const_op1(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_udiv_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = udiv <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = udiv <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 undef, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_urem_const_op0(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_urem_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = urem <3 x i8> <i8 1, i8 2, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = urem <3 x i8> <i8 1, i8 2, i8 3>, %x
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 2, i32 1, i32 undef>
+  ret <3 x i8> %r
+}
+
+define <3 x i8> @shuf_urem_const_op1(<3 x i8> %x) {
+; CHECK-LABEL: @shuf_urem_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = urem <3 x i8> [[X:%.*]], <i8 1, i8 2, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 undef, i32 1, i32 0>
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %bo = urem <3 x i8> %x, <i8 1, i8 2, i8 3>
+  %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 undef, i32 1, i32 0>
+  ret <3 x i8> %r
+}
+
+define <3 x float> @shuf_fadd(<3 x float> %x) {
+; CHECK-LABEL: @shuf_fadd(
+; CHECK-NEXT:    [[BO:%.*]] = fadd <3 x float> [[X:%.*]], <float 1.000000e+00, float 2.000000e+00, float undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %bo = fadd <3 x float> %x, <float 1.0, float 2.0, float 3.0>
+  %r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
+  ret <3 x float> %r
+}
+
+define <3 x float> @shuf_fsub(<3 x float> %x) {
+; CHECK-LABEL: @shuf_fsub(
+; CHECK-NEXT:    [[BO:%.*]] = fsub fast <3 x float> <float 1.000000e+00, float undef, float 3.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 0, i32 2>
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %bo = fsub fast <3 x float> <float 1.0, float 2.0, float 3.0>, %x
+  %r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 0, i32 2>
+  ret <3 x float> %r
+}
+
+define <3 x float> @shuf_fmul(<3 x float> %x) {
+; CHECK-LABEL: @shuf_fmul(
+; CHECK-NEXT:    [[BO:%.*]] = fmul reassoc <3 x float> [[X:%.*]], <float 1.000000e+00, float 2.000000e+00, float undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %bo = fmul reassoc <3 x float> %x, <float 1.0, float 2.0, float 3.0>
+  %r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
+  ret <3 x float> %r
+}
+
+define <3 x float> @shuf_fdiv_const_op0(<3 x float> %x) {
+; CHECK-LABEL: @shuf_fdiv_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = fdiv reassoc ninf <3 x float> <float 1.000000e+00, float undef, float 3.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 0, i32 2>
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %bo = fdiv ninf reassoc <3 x float> <float 1.0, float 2.0, float 3.0>, %x
+  %r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 0, i32 2>
+  ret <3 x float> %r
+}
+
+define <3 x float> @shuf_fdiv_const_op1(<3 x float> %x) {
+; CHECK-LABEL: @shuf_fdiv_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = fdiv nnan ninf <3 x float> [[X:%.*]], <float 1.000000e+00, float 2.000000e+00, float undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %bo = fdiv ninf nnan <3 x float> %x, <float 1.0, float 2.0, float 3.0>
+  %r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 1, i32 0>
+  ret <3 x float> %r
+}
+
+define <3 x float> @shuf_frem_const_op0(<3 x float> %x) {
+; CHECK-LABEL: @shuf_frem_const_op0(
+; CHECK-NEXT:    [[BO:%.*]] = frem nnan <3 x float> <float 1.000000e+00, float undef, float 3.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 undef, i32 2, i32 0>
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %bo = frem nnan <3 x float> <float 1.0, float 2.0, float 3.0>, %x
+  %r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 undef, i32 2, i32 0>
+  ret <3 x float> %r
+}
+
+define <3 x float> @shuf_frem_const_op1(<3 x float> %x) {
+; CHECK-LABEL: @shuf_frem_const_op1(
+; CHECK-NEXT:    [[BO:%.*]] = frem reassoc ninf <3 x float> [[X:%.*]], <float undef, float 2.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> <i32 1, i32 undef, i32 2>
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %bo = frem ninf reassoc <3 x float> %x, <float 1.0, float 2.0, float 3.0>
+  %r = shufflevector <3 x float> %bo, <3 x float> undef, <3 x i32> <i32 1, i32 undef, i32 2>
+  ret <3 x float> %r
+}
+
+;; TODO: getelementptr tests below show missing simplifications for
+;; vector demanded elements on vector geps.
+
+define i32* @gep_vbase_w_s_idx(<2 x i32*> %base) {
+; CHECK-LABEL: @gep_vbase_w_s_idx(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 1
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    ret i32* [[EE]]
+;
+  %gep = getelementptr i32, <2 x i32*> %base, i64 1
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+
+define i32* @gep_splat_base_w_s_idx(i32* %base) {
+; CHECK-LABEL: @gep_splat_base_w_s_idx(
+; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    ret i32* [[EE]]
+;
+  %basevec1 = insertelement <2 x i32*> undef, i32* %base, i32 0
+  %basevec2 = shufflevector <2 x i32*> %basevec1, <2 x i32*> undef, <2 x i32> zeroinitializer
+  %gep = getelementptr i32, <2 x i32*> %basevec2, i64 1
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+
+
+define i32* @gep_splat_base_w_cv_idx(i32* %base) {
+; CHECK-LABEL: @gep_splat_base_w_cv_idx(
+; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> <i64 undef, i64 1>
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    ret i32* [[EE]]
+;
+  %basevec1 = insertelement <2 x i32*> undef, i32* %base, i32 0
+  %basevec2 = shufflevector <2 x i32*> %basevec1, <2 x i32*> undef, <2 x i32> zeroinitializer
+  %gep = getelementptr i32, <2 x i32*> %basevec2, <2 x i64> <i64 0, i64 1>
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+
+define i32* @gep_splat_base_w_vidx(i32* %base, <2 x i64> %idxvec) {
+; CHECK-LABEL: @gep_splat_base_w_vidx(
+; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    ret i32* [[EE]]
+;
+  %basevec1 = insertelement <2 x i32*> undef, i32* %base, i32 0
+  %basevec2 = shufflevector <2 x i32*> %basevec1, <2 x i32*> undef, <2 x i32> zeroinitializer
+  %gep = getelementptr i32, <2 x i32*> %basevec2, <2 x i64> %idxvec
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+
+
+@GLOBAL = internal global i32 zeroinitializer
+
+define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) {
+; CHECK-LABEL: @gep_cvbase_w_s_idx(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> <i32* undef, i32* @GLOBAL>, i64 [[RAW_ADDR:%.*]]
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    ret i32* [[EE]]
+;
+  %gep = getelementptr i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, i64 %raw_addr
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+
+define i32* @gep_cvbase_w_cv_idx(<2 x i32*> %base, i64 %raw_addr) {
+; CHECK-LABEL: @gep_cvbase_w_cv_idx(
+; CHECK-NEXT:    ret i32* extractelement (<2 x i32*> getelementptr (i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, <2 x i64> <i64 0, i64 1>), i32 1)
+;
+  %gep = getelementptr i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, <2 x i64> <i64 0, i64 1>
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+
+
+define i32* @gep_sbase_w_cv_idx(i32* %base) {
+; CHECK-LABEL: @gep_sbase_w_cv_idx(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> <i64 undef, i64 1>
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    ret i32* [[EE]]
+;
+  %gep = getelementptr i32, i32* %base, <2 x i64> <i64 0, i64 1>
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+
+define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) {
+; CHECK-LABEL: @gep_sbase_w_splat_idx(
+; CHECK-NEXT:    [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]]
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    ret i32* [[EE]]
+;
+  %idxvec1 = insertelement <2 x i64> undef, i64 %idx, i32 0
+  %idxvec2 = shufflevector <2 x i64> %idxvec1, <2 x i64> undef, <2 x i32> zeroinitializer
+  %gep = getelementptr i32, i32* %base, <2 x i64> %idxvec2
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+define i32* @gep_splat_both(i32* %base, i64 %idx) {
+; CHECK-LABEL: @gep_splat_both(
+; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
+; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    ret i32* [[EE]]
+;
+  %basevec1 = insertelement <2 x i32*> undef, i32* %base, i32 0
+  %basevec2 = shufflevector <2 x i32*> %basevec1, <2 x i32*> undef, <2 x i32> zeroinitializer
+  %idxvec1 = insertelement <2 x i64> undef, i64 %idx, i32 0
+  %idxvec2 = shufflevector <2 x i64> %idxvec1, <2 x i64> undef, <2 x i32> zeroinitializer
+  %gep = getelementptr i32, <2 x i32*> %basevec2, <2 x i64> %idxvec2
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
+
+define <2 x i32*> @gep_all_lanes_undef(i32* %base, i64 %idx) {;
+; CHECK-LABEL: @gep_all_lanes_undef(
+; CHECK-NEXT:    ret <2 x i32*> undef
+;
+  %basevec = insertelement <2 x i32*> undef, i32* %base, i32 0
+  %idxvec = insertelement <2 x i64> undef, i64 %idx, i32 1
+  %gep = getelementptr i32, <2 x i32*> %basevec, <2 x i64> %idxvec
+  ret <2 x i32*> %gep
+}
+
+define i32* @gep_demanded_lane_undef(i32* %base, i64 %idx) {
+; CHECK-LABEL: @gep_demanded_lane_undef(
+; CHECK-NEXT:    ret i32* undef
+;
+  %basevec = insertelement <2 x i32*> undef, i32* %base, i32 0
+  %idxvec = insertelement <2 x i64> undef, i64 %idx, i32 1
+  %gep = getelementptr i32, <2 x i32*> %basevec, <2 x i64> %idxvec
+  %ee = extractelement <2 x i32*> %gep, i32 1
+  ret i32* %ee
+}
diff --git a/llvm/test/Transforms/InstCombine/vec_extract_2elts.ll b/llvm/test/Transforms/InstCombine/vec_extract_2elts.ll
new file mode 100644
index 00000000000..5972340d60a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_extract_2elts.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test(<4 x i32> %v, i64 *%r1, i64 *%r2) {
+;CHECK: %1 = extractelement <4 x i32> %v, i32 0
+;CHECK: %2 = zext i32 %1 to i64
+        %1 = zext <4 x i32> %v to <4 x i64>
+        %2 = extractelement <4 x i64> %1, i32 0
+        store i64 %2, i64 *%r1
+        store i64 %2, i64 *%r2
+        ret void
+}
+
diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll
new file mode 100644
index 00000000000..5c0610ff48b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test (float %b, <8 x float> * %p)  {
+; CHECK: extractelement
+; CHECK: fptosi
+  %1 = load <8 x float> , <8 x float> * %p
+  %2 = bitcast <8 x float> %1 to <8 x i32>
+  %3 = bitcast <8 x i32> %2 to <8 x float>
+  %a = fptosi <8 x float> %3 to <8 x i32>
+  %4 = fptosi float %b to i32
+  %5 = add i32 %4, -2
+  %6 = extractelement <8 x i32> %a, i32 %5
+  %7 = insertelement <8 x i32> undef, i32 %6, i32 7
+  %8 = sitofp <8 x i32> %7 to <8 x float>
+  store <8 x float> %8, <8 x float>* %p
+  ret void    
+}
+
+; PR18600
+define i32 @test2(i32 %i) {
+  %e = extractelement <4 x i32> bitcast (<2 x i64> <i64 1, i64 2> to <4 x i32>), i32 %i
+  ret i32 %e
+
+; CHECK-LABEL: @test2
+; CHECK: extractelement
+}
diff --git a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
new file mode 100644
index 00000000000..33ed7cb6a7f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define <4 x i16*> @PR41270([4 x i16]* %x) {
+; CHECK-LABEL: @PR41270(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x [4 x i16]*> undef, [4 x i16]* [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> [[TMP1]], i64 0, i64 3
+; CHECK-NEXT:    ret <4 x i16*> [[TMP2]]
+;
+  %ins = insertelement <4 x [4 x i16]*> undef, [4 x i16]* %x, i32 0
+  %splat = shufflevector <4 x [4 x i16]*> %ins, <4 x [4 x i16]*> undef, <4 x i32> zeroinitializer
+  %t2 = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> %splat, i32 0, i32 3
+  %t3 = extractelement <4 x i16*> %t2, i32 3
+  %ins2 = insertelement <4 x i16*> undef, i16* %t3, i32 0
+  ret <4 x i16*> %ins2
+}
diff --git a/llvm/test/Transforms/InstCombine/vec_insertelt.ll b/llvm/test/Transforms/InstCombine/vec_insertelt.ll
new file mode 100644
index 00000000000..3b949209c4d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_insertelt.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: ret <4 x i32> %A
+
+; PR1286
+define <4 x i32> @test1(<4 x i32> %A) {
+	%B = insertelement <4 x i32> %A, i32 undef, i32 1
+	ret <4 x i32> %B
+}
diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll
new file mode 100644
index 00000000000..15eb94aad69
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -0,0 +1,107 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @f(i64 %val, i32  %limit, i32 *%ptr) {
+; CHECK-LABEL: @f
+; CHECK: %0 = trunc i64 %val to i32
+; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ]
+entry:
+  %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
+  %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
+  %0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+  %1 = trunc <16 x i64> %0 to <16 x i32>
+  br label %loop
+
+loop:
+  %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+  %elt = extractelement <16 x i32> %2, i32 0
+  %end = icmp ult i32 %elt, %limit
+  %3 = add i32 10, %elt
+  %4 = sext i32 %elt to i64
+  %5 = getelementptr i32, i32* %ptr, i64 %4
+  store i32 %3, i32* %5
+  %inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  br i1 %end, label %loop, label %ret
+
+ret:
+  ret void
+}
+
+define void @copy(i64 %val, i32  %limit, i32 *%ptr) {
+; CHECK-LABEL: @copy
+; CHECK: %0 = trunc i64 %val to i32
+; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ]
+entry:
+  %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
+  %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
+  %0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+  %1 = trunc <16 x i64> %0 to <16 x i32>
+  br label %loop
+
+loop:
+  %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+  %elt = extractelement <16 x i32> %2, i32 0
+  %eltcopy = extractelement <16 x i32> %2, i32 0
+  %end = icmp ult i32 %elt, %limit
+  %3 = add i32 10, %eltcopy
+  %4 = sext i32 %elt to i64
+  %5 = getelementptr i32, i32* %ptr, i64 %4
+  store i32 %3, i32* %5
+  %inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  br i1 %end, label %loop, label %ret
+
+ret:
+  ret void
+}
+
+define void @nocopy(i64 %val, i32  %limit, i32 *%ptr) {
+; CHECK-LABEL: @nocopy
+; CHECK-NOT: phi i32
+; CHECK: phi <16 x i32> [ %3, %entry ], [ %inc, %loop ]
+entry:
+  %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
+  %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
+  %0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+  %1 = trunc <16 x i64> %0 to <16 x i32>
+  br label %loop
+
+loop:
+  %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+  %elt = extractelement <16 x i32> %2, i32 0
+  %eltcopy = extractelement <16 x i32> %2, i32 1
+  %end = icmp ult i32 %elt, %limit
+  %3 = add i32 10, %eltcopy
+  %4 = sext i32 %elt to i64
+  %5 = getelementptr i32, i32* %ptr, i64 %4
+  store i32 %3, i32* %5
+  %inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  br i1 %end, label %loop, label %ret
+
+ret:
+  ret void
+}
+
+define i1 @g(<3 x i32> %input_2) {
+; CHECK-LABEL: @g
+; CHECK: extractelement <3 x i32> %input_2, i32 0
+entry:
+  br label %for.cond
+
+for.cond:
+  %input_2.addr.0 = phi <3 x i32> [ %input_2, %entry ], [ %div45, %for.body ]
+  %input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ]
+  br i1 undef, label %for.end, label %for.body
+
+; CHECK-NOT: extractelement <3 x i32> %{{.*}}, i32 0
+for.body:
+  %dec43 = add <3 x i32> %input_1.addr.1, <i32 -1, i32 -1, i32 -1>
+  %sub44 = sub <3 x i32> <i32 -1, i32 -1, i32 -1>, %dec43
+  %div45 = sdiv <3 x i32> %input_2.addr.0, %sub44
+  br label %for.cond
+
+for.end:
+  %0 = extractelement <3 x i32> %input_2.addr.0, i32 0
+  %.89 = select i1 false, i32 0, i32 %0
+  %tobool313 = icmp eq i32 %.89, 0
+  ret i1 %tobool313
+}
+
diff --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll
new file mode 100644
index 00000000000..39bd4087416
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_sext.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @vec_select(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %cmp = icmp slt <4 x i32> %b, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %t0 = icmp slt <4 x i32> %sext, zeroinitializer
+  %sext3 = sext <4 x i1> %t0 to <4 x i32>
+  %t1 = xor <4 x i32> %sext3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %t2 = and <4 x i32> %a, %t1
+  %t3 = and <4 x i32> %sext3, %sub
+  %cond = or <4 x i32> %t2, %t3
+  ret <4 x i32> %cond
+}
+
+define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @vec_select_alternate_sign_bit_test(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %t0 = icmp slt <4 x i32> %sext, zeroinitializer
+  %sext3 = sext <4 x i1> %t0 to <4 x i32>
+  %t1 = xor <4 x i32> %sext3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %t2 = and <4 x i32> %a, %t1
+  %t3 = and <4 x i32> %sext3, %sub
+  %cond = or <4 x i32> %t2, %t3
+  ret <4 x i32> %cond
+}
+
+define <2 x i32> @is_negative_undef_elt(<2 x i32> %a) {
+; CHECK-LABEL: @is_negative_undef_elt(
+; CHECK-NEXT:    [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[A_LOBIT]]
+;
+  %cmp = icmp slt <2 x i32> %a, <i32 0, i32 undef>
+  %sext = sext <2 x i1> %cmp to <2 x i32>
+  ret <2 x i32> %sext
+
+}
+
+define <2 x i32> @is_positive_undef_elt(<2 x i32> %a) {
+; CHECK-LABEL: @is_positive_undef_elt(
+; CHECK-NEXT:    [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[A_LOBIT_NOT:%.*]] = xor <2 x i32> [[A_LOBIT]], <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> [[A_LOBIT_NOT]]
+;
+  %cmp = icmp sgt <2 x i32> %a, <i32 undef, i32 -1>
+  %sext = sext <2 x i1> %cmp to <2 x i32>
+  ret <2 x i32> %sext
+}
+
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
new file mode 100644
index 00000000000..354256aab54
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -0,0 +1,1142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <4 x float> @test1(<4 x float> %v1) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret <4 x float> [[V1:%.*]]
+;
+  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %v2
+}
+
+define <4 x float> @test2(<4 x float> %v1) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret <4 x float> [[V1:%.*]]
+;
+  %v2 = shufflevector <4 x float> %v1, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %v2
+}
+
+define float @test3(<4 x float> %A, <4 x float> %B, float %f) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret float [[F:%.*]]
+;
+  %C = insertelement <4 x float> %A, float %f, i32 0
+  %D = shufflevector <4 x float> %C, <4 x float> %B, <4 x i32> <i32 5, i32 0, i32 2, i32 7>
+  %E = extractelement <4 x float> %D, i32 1
+  ret float %E
+}
+
+define i32 @test4(<4 x i32> %X) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %t = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> zeroinitializer
+  %r = extractelement <4 x i32> %t, i32 0
+  ret i32 %r
+}
+
+define i32 @test5(<4 x i32> %X) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %t = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 undef>
+  %r = extractelement <4 x i32> %t, i32 0
+  ret i32 %r
+}
+
+define float @test6(<4 x float> %X) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
+  %X1 = bitcast <4 x float> %X to <4 x i32>
+  %t = shufflevector <4 x i32> %X1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %t2 = bitcast <4 x i32> %t to <4 x float>
+  %r = extractelement <4 x float> %t2, i32 0
+  ret float %r
+}
+
+define <4 x float> @test7(<4 x float> %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret <4 x float> [[X:%.*]]
+;
+  %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >
+  ret <4 x float> %r
+}
+
+; This should turn into a single shuffle.
+define <4 x float> @test8(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 1, i32 undef, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x float> [[T134]]
+;
+  %t4 = extractelement <4 x float> %x, i32 1
+  %t2 = extractelement <4 x float> %x, i32 3
+  %t1 = extractelement <4 x float> %y, i32 0
+  %t128 = insertelement <4 x float> undef, float %t4, i32 0
+  %t130 = insertelement <4 x float> %t128, float undef, i32 1
+  %t132 = insertelement <4 x float> %t130, float %t2, i32 2
+  %t134 = insertelement <4 x float> %t132, float %t1, i32 3
+  ret <4 x float> %t134
+}
+
+; Test fold of two shuffles where the first shuffle vectors inputs are a
+; different length then the second.
+define <4 x i8> @test9(<16 x i8> %t6) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[T9:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <4 x i32> <i32 13, i32 9, i32 4, i32 13>
+; CHECK-NEXT:    ret <4 x i8> [[T9]]
+;
+  %t7 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> < i32 13, i32 9, i32 4, i32 13 >
+  %t9 = shufflevector <4 x i8> %t7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >
+  ret <4 x i8> %t9
+}
+
+; Same as test9, but make sure that "undef" mask values are not confused with
+; mask values of 2*N, where N is the mask length.  These shuffles should not
+; be folded (because [8,9,4,8] may not be a mask supported by the target).
+
+define <4 x i8> @test9a(<16 x i8> %t6) {
+; CHECK-LABEL: @test9a(
+; CHECK-NEXT:    [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <4 x i32> <i32 undef, i32 9, i32 4, i32 8>
+; CHECK-NEXT:    [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 undef>
+; CHECK-NEXT:    ret <4 x i8> [[T9]]
+;
+  %t7 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 >
+  %t9 = shufflevector <4 x i8> %t7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >
+  ret <4 x i8> %t9
+}
+
+; Test fold of two shuffles where the first shuffle vectors inputs are a
+; different length then the second.
+define <4 x i8> @test9b(<4 x i8> %t6, <4 x i8> %t7) {
+; CHECK-LABEL: @test9b(
+; CHECK-NEXT:    [[T9:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[T7:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    ret <4 x i8> [[T9]]
+;
+  %t1 = shufflevector <4 x i8> %t6, <4 x i8> %t7, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>
+  %t9 = shufflevector <8 x i8> %t1, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i8> %t9
+}
+
+; Redundant vector splats should be removed.  Radar 8597790.
+define <4 x i32> @test10(<4 x i32> %t5) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[T7:%.*]] = shufflevector <4 x i32> [[T5:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[T7]]
+;
+  %t6 = shufflevector <4 x i32> %t5, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %t7 = shufflevector <4 x i32> %t6, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %t7
+}
+
+; Test fold of two shuffles where the two shufflevector inputs's op1 are the same.
+
+define <8 x i8> @test11(<16 x i8> %t6) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i8> [[T3]]
+;
+  %t1 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %t2 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %t3
+}
+
+; Test fold of two shuffles where the first shufflevector's inputs are the same as the second.
+
+define <8 x i8> @test12(<8 x i8> %t6, <8 x i8> %t2) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <8 x i8> [[T6:%.*]], <8 x i8> [[T2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
+; CHECK-NEXT:    ret <8 x i8> [[T3]]
+;
+  %t1 = shufflevector <8 x i8> %t6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>
+  %t3 = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
+  ret <8 x i8> %t3
+}
+
+; Test fold of two shuffles where the first shufflevector's inputs are the same as the second.
+
+define <8 x i8> @test12a(<8 x i8> %t6, <8 x i8> %t2) {
+; CHECK-LABEL: @test12a(
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <8 x i8> [[T2:%.*]], <8 x i8> [[T6:%.*]], <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:    ret <8 x i8> [[T3]]
+;
+  %t1 = shufflevector <8 x i8> %t6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>
+  %t3 = shufflevector <8 x i8> %t2, <8 x i8> %t1, <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x i8> %t3
+}
+
+; The mask length of the 1st shuffle can be reduced to eliminate the 2nd shuffle.
+
+define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @extract_subvector_of_shuffle(
+; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    ret <2 x i8> [[EXTRACT_SUBV]]
+;
+  %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <3 x i32> <i32 0, i32 2, i32 0>
+  %extract_subv = shufflevector <3 x i8> %shuf, <3 x i8> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i8> %extract_subv
+}
+
+; Undef elements in either mask are ok. Undefs from the 2nd shuffle mask should propagate to the new shuffle.
+; The type of the inputs does not have to match the output type.
+
+define <4 x i8> @extract_subvector_of_shuffle_undefs_types(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @extract_subvector_of_shuffle_undefs_types(
+; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 2, i32 0, i32 undef>
+; CHECK-NEXT:    ret <4 x i8> [[EXTRACT_SUBV]]
+;
+  %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
+  %extract_subv = shufflevector <5 x i8> %shuf, <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x i8> %extract_subv
+}
+
+; Extra uses are not ok - we only do the transform when we can eliminate an instruction.
+
+declare void @use_v5i8(<5 x i8>)
+
+define <4 x i8> @extract_subvector_of_shuffle_extra_use(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    call void @use_v5i8(<5 x i8> [[SHUF]])
+; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT:    ret <4 x i8> [[EXTRACT_SUBV]]
+;
+  %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
+  call void @use_v5i8(<5 x i8> %shuf)
+  %extract_subv = shufflevector <5 x i8> %shuf, <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x i8> %extract_subv
+}
+
+define <2 x i8> @test13a(i8 %x1, i8 %x2) {
+; CHECK-LABEL: @test13a(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> undef, i8 [[X1:%.*]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[X2:%.*]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i8> [[TMP2]], <i8 7, i8 5>
+; CHECK-NEXT:    ret <2 x i8> [[TMP3]]
+;
+  %A = insertelement <2 x i8> undef, i8 %x1, i32 0
+  %B = insertelement <2 x i8> %A, i8 %x2, i32 1
+  %C = add <2 x i8> %B, <i8 5, i8 7>
+  %D = shufflevector <2 x i8> %C, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x i8> %D
+}
+
+; Increasing length of vector ops is not a good canonicalization.
+
+define <3 x i32> @add_wider(i32 %y, i32 %z) {
+; CHECK-LABEL: @add_wider(
+; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[I1]], <i32 255, i32 255>
+; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT:    ret <3 x i32> [[EXT]]
+;
+  %i0 = insertelement <2 x i32> undef, i32 %y, i32 0
+  %i1 = insertelement <2 x i32> %i0, i32 %z, i32 1
+  %a = add <2 x i32> %i1, <i32 255, i32 255>
+  %ext = shufflevector <2 x i32> %a, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  ret <3 x i32> %ext
+}
+
+; Increasing length of vector ops must be safe from illegal undef propagation.
+
+define <3 x i32> @div_wider(i32 %y, i32 %z) {
+; CHECK-LABEL: @div_wider(
+; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
+; CHECK-NEXT:    [[A:%.*]] = sdiv <2 x i32> [[I1]], <i32 255, i32 255>
+; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT:    ret <3 x i32> [[EXT]]
+;
+  %i0 = insertelement <2 x i32> undef, i32 %y, i32 0
+  %i1 = insertelement <2 x i32> %i0, i32 %z, i32 1
+  %a = sdiv <2 x i32> %i1, <i32 255, i32 255>
+  %ext = shufflevector <2 x i32> %a, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  ret <3 x i32> %ext
+}
+
+; Increasing length of insertelements (no math ops) is a good canonicalization.
+
+define <3 x i8> @fold_inselts_with_widening_shuffle(i8 %x, i8 %y) {
+; CHECK-LABEL: @fold_inselts_with_widening_shuffle(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <3 x i8> [[TMP1]], i8 [[Y:%.*]], i32 1
+; CHECK-NEXT:    ret <3 x i8> [[TMP2]]
+;
+  %ins0 = insertelement <2 x i8> undef, i8 %x, i32 0
+  %ins1 = insertelement <2 x i8> %ins0, i8 %y, i32 1
+  %widen = shufflevector <2 x i8> %ins1, <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  ret <3 x i8> %widen
+}
+
+define <2 x i8> @test13b(i8 %x) {
+; CHECK-LABEL: @test13b(
+; CHECK-NEXT:    [[B:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x i8> [[B]]
+;
+  %A = insertelement <2 x i8> undef, i8 %x, i32 0
+  %B = shufflevector <2 x i8> %A, <2 x i8> undef, <2 x i32> <i32 undef, i32 0>
+  ret <2 x i8> %B
+}
+
+define <2 x i8> @test13c(i8 %x1, i8 %x2) {
+; CHECK-LABEL: @test13c(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> undef, i8 [[X1:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[X2:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
+;
+  %A = insertelement <4 x i8> undef, i8 %x1, i32 0
+  %B = insertelement <4 x i8> %A, i8 %x2, i32 2
+  %C = shufflevector <4 x i8> %B, <4 x i8> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x i8> %C
+}
+
+define void @test14(i16 %conv10) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    store <4 x i16> <i16 undef, i16 undef, i16 undef, i16 23>, <4 x i16>* undef, align 8
+; CHECK-NEXT:    ret void
+;
+  %t = alloca <4 x i16>, align 8
+  %vecinit6 = insertelement <4 x i16> undef, i16 23, i32 3
+  store <4 x i16> %vecinit6, <4 x i16>* undef
+  %t1 = load <4 x i16>, <4 x i16>* undef
+  %vecinit11 = insertelement <4 x i16> undef, i16 %conv10, i32 3
+  %div = udiv <4 x i16> %t1, %vecinit11
+  store <4 x i16> %div, <4 x i16>* %t
+  %t4 = load <4 x i16>, <4 x i16>* %t
+  %t5 = shufflevector <4 x i16> %t4, <4 x i16> undef, <2 x i32> <i32 2, i32 0>
+  %cmp = icmp ule <2 x i16> %t5, undef
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  ret void
+}
+
+; Check that sequences of insert/extract element are
+; collapsed into valid shuffle instruction with correct shuffle indexes.
+
+define <4 x float> @test15a(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15a(
+; CHECK-NEXT:    [[T4:%.*]] = shufflevector <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]], <4 x i32> <i32 4, i32 0, i32 6, i32 6>
+; CHECK-NEXT:    ret <4 x float> [[T4]]
+;
+  %t1 = extractelement <4 x float> %LHS, i32 0
+  %t2 = insertelement <4 x float> %RHS, float %t1, i32 1
+  %t3 = extractelement <4 x float> %RHS, i32 2
+  %t4 = insertelement <4 x float> %t2, float %t3, i32 3
+  ret <4 x float> %t4
+}
+
+define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
+; CHECK-LABEL: @test15b(
+; CHECK-NEXT:    [[T5:%.*]] = shufflevector <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]], <4 x i32> <i32 4, i32 3, i32 6, i32 6>
+; CHECK-NEXT:    ret <4 x float> [[T5]]
+;
+  %t0 = extractelement <4 x float> %LHS, i32 3
+  %t1 = insertelement <4 x float> %RHS, float %t0, i32 0
+  %t2 = extractelement <4 x float> %t1, i32 0
+  %t3 = insertelement <4 x float> %RHS, float %t2, i32 1
+  %t4 = extractelement <4 x float> %RHS, i32 2
+  %t5 = insertelement <4 x float> %t3, float %t4, i32 3
+  ret <4 x float> %t5
+}
+
+define <1 x i32> @test16a(i32 %ele) {
+; CHECK-LABEL: @test16a(
+; CHECK-NEXT:    ret <1 x i32> <i32 2>
+;
+  %t0 = insertelement <2 x i32> <i32 1, i32 undef>, i32 %ele, i32 1
+  %t1 = shl <2 x i32> %t0, <i32 1, i32 1>
+  %t2 = shufflevector <2 x i32> %t1, <2 x i32> undef, <1 x i32> <i32 0>
+  ret <1 x i32> %t2
+}
+
+define <4 x i8> @test16b(i8 %ele) {
+; CHECK-LABEL: @test16b(
+; CHECK-NEXT:    ret <4 x i8> <i8 2, i8 2, i8 2, i8 2>
+;
+  %t0 = insertelement <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 undef, i8 1>, i8 %ele, i32 6
+  %t1 = shl <8 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %t2 = shufflevector <8 x i8> %t1, <8 x i8> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i8> %t2
+}
+
+; If composition of two shuffles is identity, shuffles can be removed.
+define <4 x i32> @shuffle_17ident(<4 x i32> %v) {
+; CHECK-LABEL: @shuffle_17ident(
+; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
+;
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %shuffle2 = shufflevector <4 x i32> %shuffle, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+  ret <4 x i32> %shuffle2
+}
+
+; swizzle can be put after operation
+define <4 x i32> @shuffle_17and(<4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: @shuffle_17and(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[V1:%.*]], [[V2:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = and <4 x i32> %t1, %t2
+  ret <4 x i32> %r
+}
+
+declare void @use(<2 x float>)
+
+; One extra use is ok to transform.
+
+define <2 x float> @shuffle_fadd_multiuse(<2 x float> %v1, <2 x float> %v2) {
+; CHECK-LABEL: @shuffle_fadd_multiuse(
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[V1]], [[V2:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    call void @use(<2 x float> [[T1]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = shufflevector <2 x float> %v1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %t2 = shufflevector <2 x float> %v2, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %r = fadd <2 x float> %t1, %t2
+  call void @use(<2 x float> %t1)
+  ret <2 x float> %r
+}
+
+define <2 x float> @shuffle_fdiv_multiuse(<2 x float> %v1, <2 x float> %v2) {
+; CHECK-LABEL: @shuffle_fdiv_multiuse(
+; CHECK-NEXT:    [[T2:%.*]] = shufflevector <2 x float> [[V2:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x float> [[V1:%.*]], [[V2]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    call void @use(<2 x float> [[T2]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = shufflevector <2 x float> %v1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %t2 = shufflevector <2 x float> %v2, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %r = fdiv <2 x float> %t1, %t2
+  call void @use(<2 x float> %t2)
+  ret <2 x float> %r
+}
+
+; But 2 extra uses would require an extra instruction.
+
+define <2 x float> @shuffle_fsub_multiuse(<2 x float> %v1, <2 x float> %v2) {
+; CHECK-LABEL: @shuffle_fsub_multiuse(
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[T2:%.*]] = shufflevector <2 x float> [[V2:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = fsub <2 x float> [[T1]], [[T2]]
+; CHECK-NEXT:    call void @use(<2 x float> [[T1]])
+; CHECK-NEXT:    call void @use(<2 x float> [[T2]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = shufflevector <2 x float> %v1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %t2 = shufflevector <2 x float> %v2, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %r = fsub <2 x float> %t1, %t2
+  call void @use(<2 x float> %t1)
+  call void @use(<2 x float> %t2)
+  ret <2 x float> %r
+}
+
+define <4 x i32> @shuffle_17add(<4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: @shuffle_17add(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V1:%.*]], [[V2:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = add <4 x i32> %t1, %t2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuffle_17addnsw(<4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: @shuffle_17addnsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[V1:%.*]], [[V2:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = add nsw <4 x i32> %t1, %t2
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuffle_17addnuw(<4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: @shuffle_17addnuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <4 x i32> [[V1:%.*]], [[V2:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %t1 = shufflevector <4 x i32> %v1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x i32> %v2, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = add nuw <4 x i32> %t1, %t2
+  ret <4 x i32> %r
+}
+
+define <4 x float> @shuffle_17fsub_fast(<4 x float> %v1, <4 x float> %v2) {
+; CHECK-LABEL: @shuffle_17fsub_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <4 x float> [[V1:%.*]], [[V2:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = fsub fast <4 x float> %t1, %t2
+  ret <4 x float> %r
+}
+
+define <4 x i32> @add_const(<4 x i32> %v) {
+; CHECK-LABEL: @add_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V:%.*]], <i32 44, i32 41, i32 42, i32 43>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = add <4 x i32> %t1, <i32 41, i32 42, i32 43, i32 44>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @sub_const(<4 x i32> %v) {
+; CHECK-LABEL: @sub_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 44, i32 43, i32 42, i32 41>, [[V:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %r = sub <4 x i32> <i32 41, i32 42, i32 43, i32 44>, %t1
+  ret <4 x i32> %r
+}
+
+; Math before shuffle requires an extra shuffle.
+
+define <2 x float> @fadd_const_multiuse(<2 x float> %v) {
+; CHECK-LABEL: @fadd_const_multiuse(
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
+; CHECK-NEXT:    call void @use(<2 x float> [[T1]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %r = fadd <2 x float> %t1, <float 41.0, float 42.0>
+  call void @use(<2 x float> %t1)
+  ret <2 x float> %r
+}
+
+; Math before splat allows replacing constant elements with undef lanes.
+
+define <4 x i32> @mul_const_splat(<4 x i32> %v) {
+; CHECK-LABEL: @mul_const_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 42, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %r = mul <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %t1
+  ret <4 x i32> %r
+}
+
+; Take 2 elements of a vector and shift each of those by a different amount
+
+define <4 x i32> @lshr_const_half_splat(<4 x i32> %v) {
+; CHECK-LABEL: @lshr_const_half_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> <i32 undef, i32 8, i32 9, i32 undef>, [[V:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+  %r = lshr <4 x i32> <i32 8, i32 8, i32 9, i32 9>, %t1
+  ret <4 x i32> %r
+}
+
+; We can't change this because there's no pre-shuffle version of the fmul constant.
+
+define <2 x float> @fmul_const_invalid_constant(<2 x float> %v) {
+; CHECK-LABEL: @fmul_const_invalid_constant(
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = fmul <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 0, i32 0>
+  %r = fmul <2 x float> %t1, <float 41.0, float 42.0>
+  ret <2 x float> %r
+}
+
+; Reduce the width of the binop by moving it ahead of a shuffle.
+
+define <4 x i8> @widening_shuffle_add_1(<2 x i8> %x) {
+; CHECK-LABEL: @widening_shuffle_add_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 42, i8 43>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %r = add <4 x i8> %widex, <i8 42, i8 43, i8 44, i8 45>
+  ret <4 x i8> %r
+}
+
+; Reduce the width of the binop by moving it ahead of a shuffle.
+
+define <4 x i8> @widening_shuffle_add_2(<2 x i8> %x) {
+; CHECK-LABEL: @widening_shuffle_add_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 43, i8 42>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+  %r = add <4 x i8> %widex, <i8 42, i8 43, i8 44, i8 45>
+  ret <4 x i8> %r
+}
+
+; Negative test - widening shuffles have the same mask/constant constraint as non-size-changing shuffles.
+
+define <4 x i8> @widening_shuffle_add_invalid_constant(<2 x i8> %x) {
+; CHECK-LABEL: @widening_shuffle_add_invalid_constant(
+; CHECK-NEXT:    [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = add <4 x i8> [[WIDEX]], <i8 42, i8 43, i8 44, i8 45>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
+  %r = add <4 x i8> %widex, <i8 42, i8 43, i8 44, i8 45>
+  ret <4 x i8> %r
+}
+
+; Negative test - widening shuffles have an additional constraint: they must not extend with anything but undefs.
+
+define <4 x i8> @widening_shuffle_add_invalid_mask(<2 x i8> %x) {
+; CHECK-LABEL: @widening_shuffle_add_invalid_mask(
+; CHECK-NEXT:    [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = add <4 x i8> [[WIDEX]], <i8 42, i8 43, i8 44, i8 45>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 0>
+  %r = add <4 x i8> %widex, <i8 42, i8 43, i8 44, i8 45>
+  ret <4 x i8> %r
+}
+
+; A binop that produces undef in the high lanes can be moved before the shuffle.
+; This is ok because 'shl C, undef --> undef'.
+
+define <4 x i16> @widening_shuffle_shl_constant_op0(<2 x i16> %v) {
+; CHECK-LABEL: @widening_shuffle_shl_constant_op0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i16> <i16 42, i16 -42>, [[V:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <4 x i16> [[BO]]
+;
+  %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %bo = shl <4 x i16> <i16 42, i16 -42, i16 -1, i16 -1>, %shuf
+  ret <4 x i16> %bo
+}
+
+; A binop that produces undef in the high lanes can be moved before the shuffle.
+; This is ok because 'shl undef, 0 --> undef'.
+
+define <4 x i16> @widening_shuffle_shl_constant_op1(<2 x i16> %v) {
+; CHECK-LABEL: @widening_shuffle_shl_constant_op1(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i16> [[V:%.*]], <i16 2, i16 4>
+; CHECK-NEXT:    [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <4 x i16> [[BO]]
+;
+  %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %bo = shl <4 x i16> %shuf, <i16 2, i16 4, i16 0, i16 0>
+  ret <4 x i16> %bo
+}
+
+; A binop that does not produce undef in the high lanes can not be moved before the shuffle.
+; This is not ok because 'shl undef, 1 (or 2)' --> 0' but moving the shuffle results in undef instead.
+
+define <4 x i16> @widening_shuffle_shl_constant_op1_non0(<2 x i16> %v) {
+; CHECK-LABEL: @widening_shuffle_shl_constant_op1_non0(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BO:%.*]] = shl <4 x i16> [[SHUF]], <i16 2, i16 4, i16 1, i16 2>
+; CHECK-NEXT:    ret <4 x i16> [[BO]]
+;
+  %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %bo = shl <4 x i16> %shuf, <i16 2, i16 4, i16 1, i16 2>
+  ret <4 x i16> %bo
+}
+
+; A binop that does not produce undef in the high lanes can not be moved before the shuffle.
+; This is not ok because 'or -1, undef --> -1' but moving the shuffle results in undef instead.
+
+define <4 x i16> @widening_shuffle_or(<2 x i16> %v) {
+; CHECK-LABEL: @widening_shuffle_or(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BO:%.*]] = or <4 x i16> [[SHUF]], <i16 42, i16 -42, i16 -1, i16 -1>
+; CHECK-NEXT:    ret <4 x i16> [[BO]]
+;
+  %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %bo = or <4 x i16> %shuf, <i16 42, i16 -42, i16 -1, i16 -1>
+  ret <4 x i16> %bo
+}
+
+define <4 x i32> @shuffle_17add2(<4 x i32> %v) {
+; CHECK-LABEL: @shuffle_17add2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %t2 = add <4 x i32> %t1, %t1
+  %r = shufflevector <4 x i32> %t2, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @shuffle_17mulsplat(<4 x i32> %v) {
+; CHECK-LABEL: @shuffle_17mulsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], [[V]]
+; CHECK-NEXT:    [[M1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[M1]]
+;
+  %s1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+  %m1 = mul <4 x i32> %s1, %s1
+  %s2 = shufflevector <4 x i32> %m1, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %s2
+}
+
+; Do not reorder shuffle and binop if LHS of shuffles are of different size
+define <2 x i32> @pr19717(<4 x i32> %in0, <2 x i32> %in1) {
+; CHECK-LABEL: @pr19717(
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[IN0:%.*]], <4 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHUFFLE4:%.*]] = shufflevector <2 x i32> [[IN1:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[SHUFFLE]], [[SHUFFLE4]]
+; CHECK-NEXT:    ret <2 x i32> [[MUL]]
+;
+  %shuffle = shufflevector <4 x i32> %in0, <4 x i32> %in0, <2 x i32> zeroinitializer
+  %shuffle4 = shufflevector <2 x i32> %in1, <2 x i32> %in1, <2 x i32> zeroinitializer
+  %mul = mul <2 x i32> %shuffle, %shuffle4
+  ret <2 x i32> %mul
+}
+
+define <4 x i16> @pr19717a(<8 x i16> %in0, <8 x i16> %in1) {
+; CHECK-LABEL: @pr19717a(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <8 x i16> [[IN0:%.*]], [[IN1:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+  %shuffle = shufflevector <8 x i16> %in0, <8 x i16> %in0, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+  %shuffle1 = shufflevector <8 x i16> %in1, <8 x i16> %in1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+  %mul = mul <4 x i16> %shuffle, %shuffle1
+  ret <4 x i16> %mul
+}
+
+define <8 x i8> @pr19730(<16 x i8> %in0) {
+; CHECK-LABEL: @pr19730(
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[IN0:%.*]], <16 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x i8> [[SHUFFLE]], <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <8 x i8> [[SHUFFLE1]]
+;
+  %shuffle = shufflevector <16 x i8> %in0, <16 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %shuffle1 = shufflevector <8 x i8> %shuffle, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i8> %shuffle1
+}
+
+define i32 @pr19737(<4 x i32> %in0) {
+; CHECK-LABEL: @pr19737(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i32 0
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %shuffle.i = shufflevector <4 x i32> zeroinitializer, <4 x i32> %in0, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %neg.i = xor <4 x i32> %shuffle.i, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %and.i = and <4 x i32> %in0, %neg.i
+  %rv = extractelement <4 x i32> %and.i, i32 0
+  ret i32 %rv
+}
+
+; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
+; for an srem operation. This is not a valid optimization because it may cause a trap
+; on div-by-zero.
+
+define <4 x i32> @pr20059(<4 x i32> %p1, <4 x i32> %p2) {
+; CHECK-LABEL: @pr20059(
+; CHECK-NEXT:    [[SPLAT1:%.*]] = shufflevector <4 x i32> [[P1:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLAT2:%.*]] = shufflevector <4 x i32> [[P2:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[RETVAL:%.*]] = srem <4 x i32> [[SPLAT1]], [[SPLAT2]]
+; CHECK-NEXT:    ret <4 x i32> [[RETVAL]]
+;
+  %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %retval = srem <4 x i32> %splat1, %splat2
+  ret <4 x i32> %retval
+}
+
+define <4 x i32> @pr20114(<4 x i32> %__mask) {
+; CHECK-LABEL: @pr20114(
+; CHECK-NEXT:    [[MASK01_I:%.*]] = shufflevector <4 x i32> [[__MASK:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[MASKED_NEW_I_I_I:%.*]] = and <4 x i32> [[MASK01_I]], bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>)
+; CHECK-NEXT:    ret <4 x i32> [[MASKED_NEW_I_I_I]]
+;
+  %mask01.i = shufflevector <4 x i32> %__mask, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  %masked_new.i.i.i = and <4 x i32> bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>), %mask01.i
+  ret <4 x i32> %masked_new.i.i.i
+}
+
+define <2 x i32*> @pr23113(<4 x i32*> %A) {
+; CHECK-LABEL: @pr23113(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32*> [[A:%.*]], <4 x i32*> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x i32*> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32*> %A, <4 x i32*> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32*> %1
+}
+
+; Unused lanes in the new binop should not kill the entire op (although it may simplify anyway as shown here).
+
+define <2 x i32> @PR37648(<2 x i32> %x) {
+; CHECK-LABEL: @PR37648(
+; CHECK-NEXT:    ret <2 x i32> zeroinitializer
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = urem <2 x i32> %splat, <i32 1, i32 1>
+  ret <2 x i32> %r
+}
+
+; Test shuffle followed by binop with splat constant for all 18 binop opcodes.
+; Test with constant as operand 0 and operand 1 for non-commutative opcodes.
+
+define <2 x i32> @add_splat_constant(<2 x i32> %x) {
+; CHECK-LABEL: @add_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 42, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = add <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @sub_splat_constant0(<2 x i32> %x) {
+; CHECK-LABEL: @sub_splat_constant0(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 42, i32 undef>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = sub <2 x i32> <i32 42, i32 42>, %splat
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @sub_splat_constant1(<2 x i32> %x) {
+; CHECK-LABEL: @sub_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -42, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = sub <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @mul_splat_constant(<2 x i32> %x) {
+; CHECK-LABEL: @mul_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], <i32 42, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = mul <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @shl_splat_constant0(<2 x i32> %x) {
+; CHECK-LABEL: @shl_splat_constant0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = shl <2 x i32> <i32 5, i32 5>, %splat
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @shl_splat_constant1(<2 x i32> %x) {
+; CHECK-LABEL: @shl_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = shl <2 x i32> %splat, <i32 5, i32 5>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @ashr_splat_constant0(<2 x i32> %x) {
+; CHECK-LABEL: @ashr_splat_constant0(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = ashr <2 x i32> <i32 5, i32 5>, %splat
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @ashr_splat_constant1(<2 x i32> %x) {
+; CHECK-LABEL: @ashr_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = ashr <2 x i32> %splat, <i32 5, i32 5>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @lshr_splat_constant0(<2 x i32> %x) {
+; CHECK-LABEL: @lshr_splat_constant0(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = lshr <2 x i32> <i32 5, i32 5>, %splat
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) {
+; CHECK-LABEL: @lshr_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = lshr <2 x i32> %splat, <i32 5, i32 5>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @urem_splat_constant0(<2 x i32> %x) {
+; CHECK-LABEL: @urem_splat_constant0(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = urem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = urem <2 x i32> <i32 42, i32 42>, %splat
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @urem_splat_constant1(<2 x i32> %x) {
+; CHECK-LABEL: @urem_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem <2 x i32> [[X:%.*]], <i32 42, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = urem <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @srem_splat_constant0(<2 x i32> %x) {
+; CHECK-LABEL: @srem_splat_constant0(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = srem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = srem <2 x i32> <i32 42, i32 42>, %splat
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @srem_splat_constant1(<2 x i32> %x) {
+; CHECK-LABEL: @srem_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = srem <2 x i32> [[X:%.*]], <i32 42, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = srem <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) {
+; CHECK-LABEL: @udiv_splat_constant0(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = udiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = udiv <2 x i32> <i32 42, i32 42>, %splat
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) {
+; CHECK-LABEL: @udiv_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i32> [[X:%.*]], <i32 42, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = udiv <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) {
+; CHECK-LABEL: @sdiv_splat_constant0(
+; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = sdiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = sdiv <2 x i32> <i32 42, i32 42>, %splat
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @sdiv_splat_constant1(<2 x i32> %x) {
+; CHECK-LABEL: @sdiv_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[X:%.*]], <i32 42, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = sdiv <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @and_splat_constant(<2 x i32> %x) {
+; CHECK-LABEL: @and_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 42, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = and <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @or_splat_constant(<2 x i32> %x) {
+; CHECK-LABEL: @or_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 42, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = or <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @xor_splat_constant(<2 x i32> %x) {
+; CHECK-LABEL: @xor_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], <i32 42, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
+  %r = xor <2 x i32> %splat, <i32 42, i32 42>
+  ret <2 x i32> %r
+}
+
+define <2 x float> @fadd_splat_constant(<2 x float> %x) {
+; CHECK-LABEL: @fadd_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = fadd <2 x float> %splat, <float 42.0, float 42.0>
+  ret <2 x float> %r
+}
+
+define <2 x float> @fsub_splat_constant0(<2 x float> %x) {
+; CHECK-LABEL: @fsub_splat_constant0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = fsub <2 x float> <float 42.0, float 42.0>, %splat
+  ret <2 x float> %r
+}
+
+define <2 x float> @fsub_splat_constant1(<2 x float> %x) {
+; CHECK-LABEL: @fsub_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float -4.200000e+01, float undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = fsub <2 x float> %splat, <float 42.0, float 42.0>
+  ret <2 x float> %r
+}
+
+define <2 x float> @fneg(<2 x float> %x) {
+; CHECK-LABEL: @fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x float> <float -0.000000e+00, float undef>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = fsub <2 x float> <float -0.0, float -0.0>, %splat
+  ret <2 x float> %r
+}
+
+define <2 x float> @fmul_splat_constant(<2 x float> %x) {
+; CHECK-LABEL: @fmul_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = fmul <2 x float> %splat, <float 42.0, float 42.0>
+  ret <2 x float> %r
+}
+
+define <2 x float> @fdiv_splat_constant0(<2 x float> %x) {
+; CHECK-LABEL: @fdiv_splat_constant0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = fdiv <2 x float> <float 42.0, float 42.0>, %splat
+  ret <2 x float> %r
+}
+
+define <2 x float> @fdiv_splat_constant1(<2 x float> %x) {
+; CHECK-LABEL: @fdiv_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = fdiv <2 x float> %splat, <float 42.0, float 42.0>
+  ret <2 x float> %r
+}
+
+define <2 x float> @frem_splat_constant0(<2 x float> %x) {
+; CHECK-LABEL: @frem_splat_constant0(
+; CHECK-NEXT:    [[TMP1:%.*]] = frem <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = frem <2 x float> <float 42.0, float 42.0>, %splat
+  ret <2 x float> %r
+}
+
+define <2 x float> @frem_splat_constant1(<2 x float> %x) {
+; CHECK-LABEL: @frem_splat_constant1(
+; CHECK-NEXT:    [[TMP1:%.*]] = frem <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %splat = shufflevector <2 x float> %x, <2 x float> undef, <2 x i32> zeroinitializer
+  %r = frem <2 x float> %splat, <float 42.0, float 42.0>
+  ret <2 x float> %r
+}
+
+; Equivalent shuffle masks, but only one is a narrowing op.
+
+define <2 x i1> @PR40734(<1 x i1> %x, <4 x i1> %y) {
+; CHECK-LABEL: @PR40734(
+; CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <1 x i1> zeroinitializer, <1 x i1> [[X:%.*]], <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[NARROW:%.*]] = shufflevector <4 x i1> [[Y:%.*]], <4 x i1> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[WIDEN]], [[NARROW]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %widen = shufflevector <1 x i1> zeroinitializer, <1 x i1> %x, <2 x i32> <i32 0, i32 1>
+  %narrow = shufflevector <4 x i1> %y, <4 x i1> undef, <2 x i32> <i32 0, i32 1>
+  %r = and <2 x i1> %widen, %narrow
+  ret <2 x i1> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/vector-casts.ll b/llvm/test/Transforms/InstCombine/vector-casts.ll
new file mode 100644
index 00000000000..d2acefc0fbf
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-casts.ll
@@ -0,0 +1,413 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Can't get smaller than this.
+
+define <2 x i1> @trunc(<2 x i64> %a) {
+; CHECK-LABEL: @trunc(
+; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
+; CHECK-NEXT:    ret <2 x i1> [[T]]
+;
+  %t = trunc <2 x i64> %a to <2 x i1>
+  ret <2 x i1> %t
+}
+
+; This is trunc.
+
+define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) {
+; CHECK-LABEL: @and_cmp_is_trunc(
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t = and <2 x i64> %a, <i64 1, i64 1>
+  %r = icmp ne <2 x i64> %t, zeroinitializer
+  ret <2 x i1> %r
+}
+
+; This is trunc.
+
+define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) {
+; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elt(
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t = and <2 x i64> %a, <i64 undef, i64 1>
+  %r = icmp ne <2 x i64> %t, zeroinitializer
+  ret <2 x i1> %r
+}
+
+; TODO: This could be just 1 instruction (trunc), but our undef matching is incomplete.
+
+define <2 x i1> @and_cmp_is_trunc_even_with_undef_elts(<2 x i64> %a) {
+; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elts(
+; CHECK-NEXT:    [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 undef, i64 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 undef, i64 0>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %t = and <2 x i64> %a, <i64 undef, i64 1>
+  %r = icmp ne <2 x i64> %t, <i64 undef, i64 0>
+  ret <2 x i1> %r
+}
+
+; The ashr turns into an lshr.
+define <2 x i64> @test2(<2 x i64> %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[B]], <i64 32767, i64 32767>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %b = and <2 x i64> %a, <i64 65535, i64 65535>
+  %t = ashr <2 x i64> %b, <i64 1, i64 1>
+  ret <2 x i64> %t
+}
+
+define <2 x i64> @test3(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[CONV]]
+;
+  %cmp = fcmp ord <4 x float> %a, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %cmp4 = fcmp ord <4 x float> %b, zeroinitializer
+  %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+  %and = and <4 x i32> %sext, %sext5
+  %conv = bitcast <4 x i32> %and to <2 x i64>
+  ret <2 x i64> %conv
+}
+
+define <2 x i64> @test4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[OR]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[CONV]]
+;
+  %cmp = fcmp uno <4 x float> %a, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %cmp4 = fcmp uno <4 x float> %b, zeroinitializer
+  %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+  %or = or <4 x i32> %sext, %sext5
+  %conv = bitcast <4 x i32> %or to <2 x i64>
+  ret <2 x i64> %conv
+}
+
+; rdar://7434900
+define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[AND1:%.*]] = and <4 x i1> [[CMP]], [[CMP4]]
+; CHECK-NEXT:    [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
+; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[CONV]]
+;
+  %cmp = fcmp ult <4 x float> %a, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
+  %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+  %and = and <4 x i32> %sext, %sext5
+  %conv = bitcast <4 x i32> %and to <2 x i64>
+  ret <2 x i64> %conv
+}
+
+define <2 x i64> @test6(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[AND1:%.*]] = or <4 x i1> [[CMP]], [[CMP4]]
+; CHECK-NEXT:    [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
+; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[CONV]]
+;
+  %cmp = fcmp ult <4 x float> %a, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
+  %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+  %and = or <4 x i32> %sext, %sext5
+  %conv = bitcast <4 x i32> %and to <2 x i64>
+  ret <2 x i64> %conv
+}
+
+define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[AND1:%.*]] = xor <4 x i1> [[CMP]], [[CMP4]]
+; CHECK-NEXT:    [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
+; CHECK-NEXT:    [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[CONV]]
+;
+  %cmp = fcmp ult <4 x float> %a, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
+  %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+  %and = xor <4 x i32> %sext, %sext5
+  %conv = bitcast <4 x i32> %and to <2 x i64>
+  ret <2 x i64> %conv
+}
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) {
+; CHECK-LABEL: @convert(
+; CHECK-NEXT:    [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[VAL]], <i32 1, i32 1>
+; CHECK-NEXT:    store <2 x i32> [[ADD]], <2 x i32>* [[DST_ADDR:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  %val = trunc <2 x i64> %src to <2 x i32>
+  %add = add <2 x i32> %val, <i32 1, i32 1>
+  store <2 x i32> %add, <2 x i32>* %dst.addr
+  ret void
+}
+
+define <2 x i65> @foo(<2 x i64> %t) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], <i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[B:%.*]] = zext <2 x i64> [[A_MASK]] to <2 x i65>
+; CHECK-NEXT:    ret <2 x i65> [[B]]
+;
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = zext <2 x i32> %a to <2 x i65>
+  ret <2 x i65> %b
+}
+
+define <2 x i64> @bar(<2 x i65> %t) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+  %a = trunc <2 x i65> %t to <2 x i32>
+  %b = zext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+
+define <2 x i64> @bars(<2 x i65> %t) {
+; CHECK-LABEL: @bars(
+; CHECK-NEXT:    [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[B:%.*]] = sext <2 x i32> [[A]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+  %a = trunc <2 x i65> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+
+define <2 x i64> @quxs(<2 x i64> %t) {
+; CHECK-LABEL: @quxs(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+
+define <2 x i64> @quxt(<2 x i64> %t) {
+; CHECK-LABEL: @quxt(
+; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 32, i64 32>
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+  %a = shl <2 x i64> %t, <i64 32, i64 32>
+  %b = ashr <2 x i64> %a, <i64 32, i64 32>
+  ret <2 x i64> %b
+}
+
+define <2 x double> @fa(<2 x double> %t) {
+; CHECK-LABEL: @fa(
+; CHECK-NEXT:    [[A:%.*]] = fptrunc <2 x double> [[T:%.*]] to <2 x float>
+; CHECK-NEXT:    [[B:%.*]] = fpext <2 x float> [[A]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[B]]
+;
+  %a = fptrunc <2 x double> %t to <2 x float>
+  %b = fpext <2 x float> %a to <2 x double>
+  ret <2 x double> %b
+}
+
+define <2 x double> @fb(<2 x double> %t) {
+; CHECK-LABEL: @fb(
+; CHECK-NEXT:    [[A:%.*]] = fptoui <2 x double> [[T:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[B:%.*]] = uitofp <2 x i64> [[A]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[B]]
+;
+  %a = fptoui <2 x double> %t to <2 x i64>
+  %b = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
+
+define <2 x double> @fc(<2 x double> %t) {
+; CHECK-LABEL: @fc(
+; CHECK-NEXT:    [[A:%.*]] = fptosi <2 x double> [[T:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[B:%.*]] = sitofp <2 x i64> [[A]] to <2 x double>
+; CHECK-NEXT:    ret <2 x double> [[B]]
+;
+  %a = fptosi <2 x double> %t to <2 x i64>
+  %b = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
+
+; PR9228
+define <4 x float> @f(i32 %a) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    ret <4 x float> undef
+;
+  %dim = insertelement <4 x i32> undef, i32 %a, i32 0
+  %dim30 = insertelement <4 x i32> %dim, i32 %a, i32 1
+  %dim31 = insertelement <4 x i32> %dim30, i32 %a, i32 2
+  %dim32 = insertelement <4 x i32> %dim31, i32 %a, i32 3
+
+  %offset_ptr = getelementptr <4 x float>, <4 x float>* null, i32 1
+  %offset_int = ptrtoint <4 x float>* %offset_ptr to i64
+  %sizeof32 = trunc i64 %offset_int to i32
+
+  %smearinsert33 = insertelement <4 x i32> undef, i32 %sizeof32, i32 0
+  %smearinsert34 = insertelement <4 x i32> %smearinsert33, i32 %sizeof32, i32 1
+  %smearinsert35 = insertelement <4 x i32> %smearinsert34, i32 %sizeof32, i32 2
+  %smearinsert36 = insertelement <4 x i32> %smearinsert35, i32 %sizeof32, i32 3
+
+  %delta_scale = mul <4 x i32> %dim32, %smearinsert36
+  %offset_delta = add <4 x i32> zeroinitializer, %delta_scale
+
+  %offset_varying_delta = add <4 x i32> %offset_delta, undef
+
+  ret <4 x float> undef
+}
+
+define <8 x i32> @pr24458(<8 x float> %n) {
+; CHECK-LABEL: @pr24458(
+; CHECK-NEXT:    ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+;
+  %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer
+  %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer
+  %notequal_b_load__to_boolvec.i = sext <8 x i1> %notequal_b_load_.i to <8 x i32>
+  %equal_a_load72__to_boolvec.i = sext <8 x i1> %equal_a_load72_.i to <8 x i32>
+  %wrong = or <8 x i32> %notequal_b_load__to_boolvec.i, %equal_a_load72__to_boolvec.i
+  ret <8 x i32> %wrong
+}
+
+; Hoist a trunc to a scalar if we're inserting into an undef vector.
+; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
+
+define <3 x i16> @trunc_inselt_undef(i32 %x) {
+; CHECK-LABEL: @trunc_inselt_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
+; CHECK-NEXT:    [[TRUNC:%.*]] = insertelement <3 x i16> undef, i16 [[TMP1]], i32 1
+; CHECK-NEXT:    ret <3 x i16> [[TRUNC]]
+;
+  %vec = insertelement <3 x i32> undef, i32 %x, i32 1
+  %trunc = trunc <3 x i32> %vec to <3 x i16>
+  ret <3 x i16> %trunc
+}
+
+; Hoist a trunc to a scalar if we're inserting into an undef vector.
+; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
+
+define <2 x float> @fptrunc_inselt_undef(double %x, i32 %index) {
+; CHECK-LABEL: @fptrunc_inselt_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc double [[X:%.*]] to float
+; CHECK-NEXT:    [[TRUNC:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 [[INDEX:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[TRUNC]]
+;
+  %vec = insertelement <2 x double> <double undef, double undef>, double %x, i32 %index
+  %trunc = fptrunc <2 x double> %vec to <2 x float>
+  ret <2 x float> %trunc
+}
+
+; TODO: Strengthen the backend, so we can have this canonicalization.
+; Insert a scalar int into a constant vector and truncate:
+; trunc (inselt C, X, Index) --> inselt C, (trunc X), Index
+
+define <3 x i16> @trunc_inselt1(i32 %x) {
+; CHECK-LABEL: @trunc_inselt1(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <3 x i32> <i32 3, i32 undef, i32 65536>, i32 [[X:%.*]], i32 1
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <3 x i32> [[VEC]] to <3 x i16>
+; CHECK-NEXT:    ret <3 x i16> [[TRUNC]]
+;
+  %vec = insertelement <3 x i32> <i32 3, i32 -2, i32 65536>, i32 %x, i32 1
+  %trunc = trunc <3 x i32> %vec to <3 x i16>
+  ret <3 x i16> %trunc
+}
+
+; TODO: Strengthen the backend, so we can have this canonicalization.
+; Insert a scalar FP into a constant vector and FP truncate:
+; fptrunc (inselt C, X, Index) --> inselt C, (fptrunc X), Index
+
+define <2 x float> @fptrunc_inselt1(double %x, i32 %index) {
+; CHECK-LABEL: @fptrunc_inselt1(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <2 x double> <double undef, double 3.000000e+00>, double [[X:%.*]], i32 [[INDEX:%.*]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <2 x double> [[VEC]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[TRUNC]]
+;
+  %vec = insertelement <2 x double> <double undef, double 3.0>, double %x, i32 %index
+  %trunc = fptrunc <2 x double> %vec to <2 x float>
+  ret <2 x float> %trunc
+}
+
+; TODO: Strengthen the backend, so we can have this canonicalization.
+; Insert a scalar int constant into a vector and truncate:
+; trunc (inselt X, C, Index) --> inselt (trunc X), C', Index
+
+define <8 x i16> @trunc_inselt2(<8 x i32> %x, i32 %index) {
+; CHECK-LABEL: @trunc_inselt2(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <8 x i32> [[X:%.*]], i32 1048576, i32 [[INDEX:%.*]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <8 x i32> [[VEC]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TRUNC]]
+;
+  %vec = insertelement <8 x i32> %x, i32 1048576, i32 %index
+  %trunc = trunc <8 x i32> %vec to <8 x i16>
+  ret <8 x i16> %trunc
+}
+
+; TODO: Strengthen the backend, so we can have this canonicalization.
+; Insert a scalar FP constant into a vector and FP truncate:
+; fptrunc (inselt X, C, Index) --> inselt (fptrunc X), C', Index
+
+define <3 x float> @fptrunc_inselt2(<3 x double> %x) {
+; CHECK-LABEL: @fptrunc_inselt2(
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <3 x double> [[X:%.*]], double 4.000000e+00, i32 2
+; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc <3 x double> [[VEC]] to <3 x float>
+; CHECK-NEXT:    ret <3 x float> [[TRUNC]]
+;
+  %vec = insertelement <3 x double> %x, double 4.0, i32 2
+  %trunc = fptrunc <3 x double> %vec to <3 x float>
+  ret <3 x float> %trunc
+}
+
+; Converting to a wide type might reduce instruction count,
+; but we can not do that unless the backend can recover from
+; the creation of a potentially illegal op (like a 64-bit vmul).
+; PR40032 - https://bugs.llvm.org/show_bug.cgi?id=40032
+
+define <2 x i64> @sext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @sext_less_casting_with_wideop(
+; CHECK-NEXT:    [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]]
+; CHECK-NEXT:    [[R:%.*]] = sext <2 x i32> [[MUL]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[R]]
+;
+  %xnarrow = trunc <2 x i64> %x to <2 x i32>
+  %ynarrow = trunc <2 x i64> %y to <2 x i32>
+  %mul = mul <2 x i32> %xnarrow, %ynarrow
+  %r = sext <2 x i32> %mul to <2 x i64>
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @zext_less_casting_with_wideop(
+; CHECK-NEXT:    [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i32> [[MUL]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[R]]
+;
+  %xnarrow = trunc <2 x i64> %x to <2 x i32>
+  %ynarrow = trunc <2 x i64> %y to <2 x i32>
+  %mul = mul <2 x i32> %xnarrow, %ynarrow
+  %r = zext <2 x i32> %mul to <2 x i64>
+  ret <2 x i64> %r
+}
+
diff --git a/llvm/test/Transforms/InstCombine/vector-concat-binop.ll b/llvm/test/Transforms/InstCombine/vector-concat-binop.ll
new file mode 100644
index 00000000000..c2b0f0c2145
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-concat-binop.ll
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+define <4 x i8> @add(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @add(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = and <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+; Flags should propagate.
+
+define <4 x i8> @sub(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = sub nsw <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+; Flags should propagate.
+
+define <4 x i8> @mul(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = mul nuw <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+; Undef in shuffle mask does not necessarily propagate.
+
+define <4 x i8> @and(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @and(
+; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = and <4 x i8> [[CONCAT1]], [[CONCAT2]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = and <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+; Undef in shuffle mask does not necessarily propagate.
+
+define <4 x i8> @or(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @or(
+; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = or <4 x i8> [[CONCAT1]], [[CONCAT2]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
+  %r = or <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+; Undefs in shuffle mask do not necessarily propagate.
+
+define <4 x i8> @xor(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @xor(
+; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
+; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = xor <4 x i8> [[CONCAT1]], [[CONCAT2]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+  %r = xor <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @shl(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @shl(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+  %r = shl nuw <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @lshr(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @lshr(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 undef, i32 undef, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 undef, i32 undef, i32 3>
+  %r = lshr exact <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+; Extra-uses prevent the transform.
+declare void @use(<4 x i8>)
+
+define <4 x i8> @ashr(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @ashr(
+; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    call void @use(<4 x i8> [[CONCAT1]])
+; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = ashr <4 x i8> [[CONCAT1]], [[CONCAT2]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  call void @use(<4 x i8> %concat1)
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = ashr <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+; TODO: Div/rem with undef in any element in the divisor is undef, so this should be simplified away?
+
+define <4 x i8> @sdiv(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @sdiv(
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv exact <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sdiv exact <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+  %r = sdiv exact <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @srem(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @srem(
+; CHECK-NEXT:    [[TMP1:%.*]] = srem <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = srem <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = srem <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @udiv(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @udiv(
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv exact <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv exact <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = udiv exact <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+; TODO: Div/rem with undef in any element in the divisor is undef, so this should be simplified away?
+
+define <4 x i8> @urem(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
+; CHECK-LABEL: @urem(
+; CHECK-NEXT:    [[TMP1:%.*]] = urem <2 x i8> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = urem <2 x i8> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x i8> %c, <2 x i8> %d, <4 x i32> <i32 undef, i32 1, i32 2, i32 3>
+  %r = urem <4 x i8> %concat1, %concat2
+  ret <4 x i8> %r
+}
+
+define <4 x float> @fadd(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: @fadd(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x float> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = fadd <4 x float> %concat1, %concat2
+  ret <4 x float> %r
+}
+
+; Fast-math-flags propagate.
+
+define <4 x float> @fsub(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: @fsub(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <2 x float> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast <2 x float> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 0, i32 1, i32 undef, i32 3>
+  %r = fsub fast <4 x float> %concat1, %concat2
+  ret <4 x float> %r
+}
+
+; Extra-uses prevent the transform.
+declare void @use2(<4 x float>)
+
+define <4 x float> @fmul(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: @fmul(
+; CHECK-NEXT:    [[CONCAT1:%.*]] = shufflevector <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
+; CHECK-NEXT:    [[CONCAT2:%.*]] = shufflevector <2 x float> [[C:%.*]], <2 x float> [[D:%.*]], <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
+; CHECK-NEXT:    call void @use2(<4 x float> [[CONCAT2]])
+; CHECK-NEXT:    [[R:%.*]] = fmul nnan <4 x float> [[CONCAT1]], [[CONCAT2]]
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
+  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
+  call void @use2(<4 x float> %concat2)
+  %r = fmul nnan <4 x float> %concat1, %concat2
+  ret <4 x float> %r
+}
+
+; Fast-math-flags propagate.
+
+define <4 x float> @fdiv(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: @fdiv(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv ninf arcp <2 x float> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv ninf arcp <2 x float> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = fdiv ninf arcp <4 x float> %concat1, %concat2
+  ret <4 x float> %r
+}
+
+define <4 x float> @frem(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) {
+; CHECK-LABEL: @frem(
+; CHECK-NEXT:    [[TMP1:%.*]] = frem <2 x float> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = frem <2 x float> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
+  %concat2 = shufflevector <2 x float> %c, <2 x float> %d, <4 x i32> <i32 0, i32 undef, i32 2, i32 3>
+  %r = frem <4 x float> %concat1, %concat2
+  ret <4 x float> %r
+}
+
+; https://bugs.llvm.org/show_bug.cgi?id=33026 - all of the shuffles can be eliminated.
+
+define <4 x i32> @PR33026(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
+; CHECK-LABEL: @PR33026(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[B:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[SUB]]
+;
+  %concat1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %concat2 = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %and = and <8 x i32> %concat1, %concat2
+  %extract1 = shufflevector <8 x i32> %and, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %extract2 = shufflevector <8 x i32> %and, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %sub = sub <4 x i32> %extract1, %extract2
+  ret <4 x i32> %sub
+}
diff --git a/llvm/test/Transforms/InstCombine/vector-mul.ll b/llvm/test/Transforms/InstCombine/vector-mul.ll
new file mode 100644
index 00000000000..d9809281edb
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-mul.ll
@@ -0,0 +1,445 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check that instcombine rewrites multiply by a vector
+; of known constant power-of-2 elements with vector shift.
+
+define <4 x i8> @Zero_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @Zero_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i8> zeroinitializer
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 0, i8 0, i8 0, i8 0>
+  ret <4 x i8> %mul
+}
+
+define <4 x i8> @Identity_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @Identity_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i8> [[INVEC:%.*]]
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 1, i8 1, i8 1, i8 1>
+  ret <4 x i8> %mul
+}
+
+define <4 x i8> @AddToSelf_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @AddToSelf_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    ret <4 x i8> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2>
+  ret <4 x i8> %mul
+}
+
+define <4 x i8> @SplatPow2Test1_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @SplatPow2Test1_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    ret <4 x i8> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 4, i8 4>
+  ret <4 x i8> %mul
+}
+
+define <4 x i8> @SplatPow2Test2_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @SplatPow2Test2_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    ret <4 x i8> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 8, i8 8, i8 8, i8 8>
+  ret <4 x i8> %mul
+}
+
+define <4 x i8> @MulTest1_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @MulTest1_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], <i8 0, i8 1, i8 2, i8 3>
+; CHECK-NEXT:    ret <4 x i8> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 1, i8 2, i8 4, i8 8>
+  ret <4 x i8> %mul
+}
+
+define <4 x i8> @MulTest2_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @MulTest2_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    ret <4 x i8> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 3, i8 3, i8 3, i8 3>
+  ret <4 x i8> %mul
+}
+
+define <4 x i8> @MulTest3_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @MulTest3_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], <i8 2, i8 2, i8 1, i8 1>
+; CHECK-NEXT:    ret <4 x i8> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 2, i8 2>
+  ret <4 x i8> %mul
+}
+
+define <4 x i8> @MulTest4_i8(<4 x i8> %InVec)  {
+; CHECK-LABEL: @MulTest4_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], <i8 4, i8 4, i8 0, i8 1>
+; CHECK-NEXT:    ret <4 x i8> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i8> %InVec, <i8 4, i8 4, i8 0, i8 1>
+  ret <4 x i8> %mul
+}
+
+define <4 x i16> @Zero_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @Zero_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i16> zeroinitializer
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0>
+  ret <4 x i16> %mul
+}
+
+define <4 x i16> @Identity_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @Identity_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i16> [[INVEC:%.*]]
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %mul
+}
+
+define <4 x i16> @AddToSelf_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @AddToSelf_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2>
+  ret <4 x i16> %mul
+}
+
+define <4 x i16> @SplatPow2Test1_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @SplatPow2Test1_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], <i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 4, i16 4>
+  ret <4 x i16> %mul
+}
+
+define <4 x i16> @SplatPow2Test2_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @SplatPow2Test2_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], <i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 8, i16 8, i16 8, i16 8>
+  ret <4 x i16> %mul
+}
+
+define <4 x i16> @MulTest1_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @MulTest1_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], <i16 0, i16 1, i16 2, i16 3>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 1, i16 2, i16 4, i16 8>
+  ret <4 x i16> %mul
+}
+
+define <4 x i16> @MulTest2_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @MulTest2_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], <i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %mul
+}
+
+define <4 x i16> @MulTest3_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @MulTest3_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], <i16 2, i16 2, i16 1, i16 1>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 2, i16 2>
+  ret <4 x i16> %mul
+}
+
+define <4 x i16> @MulTest4_i16(<4 x i16> %InVec)  {
+; CHECK-LABEL: @MulTest4_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], <i16 4, i16 4, i16 0, i16 2>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i16> %InVec, <i16 4, i16 4, i16 0, i16 2>
+  ret <4 x i16> %mul
+}
+
+define <4 x i32> @Zero_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @Zero_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %mul
+}
+
+define <4 x i32> @Identity_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @Identity_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i32> [[INVEC:%.*]]
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %mul
+}
+
+define <4 x i32> @AddToSelf_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @AddToSelf_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %mul
+}
+
+define <4 x i32> @SplatPow2Test1_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @SplatPow2Test1_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 4, i32 4>
+  ret <4 x i32> %mul
+}
+
+define <4 x i32> @SplatPow2Test2_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @SplatPow2Test2_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %mul
+}
+
+define <4 x i32> @MulTest1_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @MulTest1_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 1, i32 2, i32 4, i32 8>
+  ret <4 x i32> %mul
+}
+
+define <4 x i32> @MulTest2_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @MulTest2_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %mul
+}
+
+define <4 x i32> @MulTest3_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @MulTest3_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], <i32 2, i32 2, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 2, i32 2>
+  ret <4 x i32> %mul
+}
+
+define <4 x i32> @MulTest4_i32(<4 x i32> %InVec)  {
+; CHECK-LABEL: @MulTest4_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], <i32 4, i32 4, i32 0, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i32> %InVec, <i32 4, i32 4, i32 0, i32 1>
+  ret <4 x i32> %mul
+}
+
+define <4 x i64> @Zero_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @Zero_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
+  ret <4 x i64> %mul
+}
+
+define <4 x i64> @Identity_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @Identity_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i64> [[INVEC:%.*]]
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %mul
+}
+
+define <4 x i64> @AddToSelf_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @AddToSelf_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    ret <4 x i64> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %mul
+}
+
+define <4 x i64> @SplatPow2Test1_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @SplatPow2Test1_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], <i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEXT:    ret <4 x i64> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 4, i64 4>
+  ret <4 x i64> %mul
+}
+
+define <4 x i64> @SplatPow2Test2_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @SplatPow2Test2_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], <i64 3, i64 3, i64 3, i64 3>
+; CHECK-NEXT:    ret <4 x i64> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 8, i64 8, i64 8, i64 8>
+  ret <4 x i64> %mul
+}
+
+define <4 x i64> @MulTest1_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @MulTest1_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    ret <4 x i64> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 1, i64 2, i64 4, i64 8>
+  ret <4 x i64> %mul
+}
+
+define <4 x i64> @MulTest2_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @MulTest2_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], <i64 3, i64 3, i64 3, i64 3>
+; CHECK-NEXT:    ret <4 x i64> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %mul
+}
+
+define <4 x i64> @MulTest3_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @MulTest3_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], <i64 2, i64 2, i64 1, i64 1>
+; CHECK-NEXT:    ret <4 x i64> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 2, i64 2>
+  ret <4 x i64> %mul
+}
+
+define <4 x i64> @MulTest4_i64(<4 x i64> %InVec)  {
+; CHECK-LABEL: @MulTest4_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], <i64 4, i64 4, i64 0, i64 1>
+; CHECK-NEXT:    ret <4 x i64> [[MUL]]
+;
+entry:
+  %mul = mul <4 x i64> %InVec, <i64 4, i64 4, i64 0, i64 1>
+  ret <4 x i64> %mul
+}
+
+; Test also that the following rewriting rule works with vectors
+; of integers as well:
+;   ((X << C1)*C2) == (X * (C2 << C1))
+
+define <4 x i8> @ShiftMulTest1(<4 x i8> %InVec) {
+; CHECK-LABEL: @ShiftMulTest1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], <i8 12, i8 12, i8 12, i8 12>
+; CHECK-NEXT:    ret <4 x i8> [[MUL]]
+;
+entry:
+  %shl = shl <4 x i8> %InVec, <i8 2, i8 2, i8 2, i8 2>
+  %mul = mul <4 x i8> %shl, <i8 3, i8 3, i8 3, i8 3>
+  ret <4 x i8> %mul
+}
+
+define <4 x i16> @ShiftMulTest2(<4 x i16> %InVec) {
+; CHECK-LABEL: @ShiftMulTest2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], <i16 12, i16 12, i16 12, i16 12>
+; CHECK-NEXT:    ret <4 x i16> [[MUL]]
+;
+entry:
+  %shl = shl <4 x i16> %InVec, <i16 2, i16 2, i16 2, i16 2>
+  %mul = mul <4 x i16> %shl, <i16 3, i16 3, i16 3, i16 3>
+  ret <4 x i16> %mul
+}
+
+define <4 x i32> @ShiftMulTest3(<4 x i32> %InVec) {
+; CHECK-LABEL: @ShiftMulTest3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], <i32 12, i32 12, i32 12, i32 12>
+; CHECK-NEXT:    ret <4 x i32> [[MUL]]
+;
+entry:
+  %shl = shl <4 x i32> %InVec, <i32 2, i32 2, i32 2, i32 2>
+  %mul = mul <4 x i32> %shl, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %mul
+}
+
+define <4 x i64> @ShiftMulTest4(<4 x i64> %InVec) {
+; CHECK-LABEL: @ShiftMulTest4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], <i64 12, i64 12, i64 12, i64 12>
+; CHECK-NEXT:    ret <4 x i64> [[MUL]]
+;
+entry:
+  %shl = shl <4 x i64> %InVec, <i64 2, i64 2, i64 2, i64 2>
+  %mul = mul <4 x i64> %shl, <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %mul
+}
diff --git a/llvm/test/Transforms/InstCombine/vector-type.ll b/llvm/test/Transforms/InstCombine/vector-type.ll
new file mode 100644
index 00000000000..59a4bdd19e7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-type.ll
@@ -0,0 +1,15 @@
+; The code in InstCombiner::FoldSelectOpOp was calling
+; Type::getVectorNumElements without checking first if the type was a vector.
+
+; RUN: opt < %s -instcombine -S
+
+define i32 @vselect1(i32 %a.coerce, i32 %b.coerce, i32 %c.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %2 = bitcast i32 %c.coerce to <2 x i16>
+  %cmp = icmp sge <2 x i16> %2, zeroinitializer
+  %or = select <2 x i1> %cmp, <2 x i16> %0, <2 x i16> %1
+  %3 = bitcast <2 x i16> %or to i32
+  ret i32 %3
+}
diff --git a/llvm/test/Transforms/InstCombine/vector-udiv.ll b/llvm/test/Transforms/InstCombine/vector-udiv.ll
new file mode 100644
index 00000000000..e16c93265ba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-udiv.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <4 x i32> @test_v4i32_splatconst_pow2(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_splatconst_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = udiv <4 x i32> %a0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_const_pow2(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_const_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = udiv <4 x i32> %a0, <i32 1, i32 2, i32 4, i32 8>
+  ret <4 x i32> %1
+}
+
+; X udiv C, where C >= signbit
+define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_negconstsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[A0:%.*]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = udiv <4 x i32> %a0, <i32 -3, i32 -3, i32 -3, i32 -3>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_negconst(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_negconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[A0:%.*]], <i32 -4, i32 -6, i32 -8, i32 -10>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = udiv <4 x i32> %a0, <i32 -3, i32 -5, i32 -7, i32 -9>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_negconst_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_negconst_undef(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
+  %1 = udiv <4 x i32> %a0, <i32 -3, i32 -5, i32 -7, i32 undef>
+  ret <4 x i32> %1
+}
+
+; X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
+define <4 x i32> @test_v4i32_shl_splatconst_pow2(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @test_v4i32_shl_splatconst_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = shl <4 x i32> <i32 4, i32 4, i32 4, i32 4>, %a1
+  %2 = udiv <4 x i32> %a0, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_shl_const_pow2(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: @test_v4i32_shl_const_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], <i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = shl <4 x i32> <i32 4, i32 8, i32 16, i32 32>, %a1
+  %2 = udiv <4 x i32> %a0, %1
+  ret <4 x i32> %2
+}
+
+; X udiv (zext (C1 << N)), where C1 is "1<<C2"  -->  X >> (N+C2)
+define <4 x i32> @test_v4i32_zext_shl_splatconst_pow2(<4 x i32> %a0, <4 x i16> %a1) {
+; CHECK-LABEL: @test_v4i32_zext_shl_splatconst_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[A1:%.*]], <i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %1 = shl <4 x i16> <i16 4, i16 4, i16 4, i16 4>, %a1
+  %2 = zext <4 x i16> %1 to <4 x i32>
+  %3 = udiv <4 x i32> %a0, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @test_v4i32_zext_shl_const_pow2(<4 x i32> %a0, <4 x i16> %a1) {
+; CHECK-LABEL: @test_v4i32_zext_shl_const_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[A1:%.*]], <i16 2, i16 3, i16 4, i16 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %1 = shl <4 x i16> <i16 4, i16 8, i16 16, i16 32>, %a1
+  %2 = zext <4 x i16> %1 to <4 x i32>
+  %3 = udiv <4 x i32> %a0, %2
+  ret <4 x i32> %3
+}
diff --git a/llvm/test/Transforms/InstCombine/vector-urem.ll b/llvm/test/Transforms/InstCombine/vector-urem.ll
new file mode 100644
index 00000000000..113451f8469
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-urem.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <4 x i32> @test_v4i32_splatconst_pow2(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_splatconst_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = urem <4 x i32> %a0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_const_pow2(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_const_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], <i32 0, i32 1, i32 3, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = urem <4 x i32> %a0, <i32 1, i32 2, i32 4, i32 8>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_const_pow2_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_const_pow2_undef(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
+  %1 = urem <4 x i32> %a0, <i32 1, i32 2, i32 4, i32 undef>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_one(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_one(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = urem <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %a0
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_one_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_one_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = urem <4 x i32> <i32 1, i32 1, i32 1, i32 undef>, %a0
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_negconstsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i32> [[A0:%.*]], <i32 -3, i32 -3, i32 -3, i32 -3>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[A0]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %1 = urem <4 x i32> %a0, <i32 -3, i32 -3, i32 -3, i32 -3>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_negconst(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_negconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i32> [[A0:%.*]], <i32 -3, i32 -5, i32 -7, i32 -9>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[A0]], <i32 3, i32 5, i32 7, i32 9>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0]], <4 x i32> [[TMP2]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %1 = urem <4 x i32> %a0, <i32 -3, i32 -5, i32 -7, i32 -9>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_v4i32_negconst_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_negconst_undef(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
+  %1 = urem <4 x i32> %a0, <i32 -3, i32 -5, i32 -7, i32 undef>
+  ret <4 x i32> %1
+}
diff --git a/llvm/test/Transforms/InstCombine/vector-xor.ll b/llvm/test/Transforms/InstCombine/vector-xor.ll
new file mode 100644
index 00000000000..c10e56a6561
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-xor.ll
@@ -0,0 +1,281 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (A&B)^(A&C) -> A&(B^C) etc
+
+define <4 x i32> @test_v4i32_xor_repeated_and_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: @test_v4i32_xor_repeated_and_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = and <4 x i32> %a, %b
+  %2 = and <4 x i32> %a, %c
+  %3 = xor <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @test_v4i32_xor_repeated_and_1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: @test_v4i32_xor_repeated_and_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = and <4 x i32> %a, %b
+  %2 = and <4 x i32> %c, %a
+  %3 = xor <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+; xor(bswap(a), c) to bswap(xor(a, bswap(c)))
+
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+
+define <4 x i32> @test_v4i32_xor_bswap_splatconst(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_bswap_splatconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A0:%.*]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a0)
+  %2 = xor  <4 x i32> %1, <i32 255, i32 255, i32 255, i32 255>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_xor_bswap_const(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_bswap_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[A0:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 0, i32 -16777216, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a0)
+  %2 = xor  <4 x i32> %1, <i32 0, i32 -16777216, i32 2, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_xor_bswap_const_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_bswap_const_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[A0:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 undef, i32 0, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a0)
+  %2 = xor  <4 x i32> %1, <i32 undef, i32 0, i32 2, i32 3>
+  ret <4 x i32> %2
+}
+
+; DeMorgan's Law: ~(~X & Y) --> (X | ~Y)
+
+define <4 x i32> @test_v4i32_demorgan_and(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @test_v4i32_demorgan_and(
+; CHECK-NEXT:    [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i32> [[Y_NOT]], [[X:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %x
+  %2 = and <4 x i32> %1, %y
+  %3 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %2
+  ret <4 x i32> %3
+}
+
+; DeMorgan's Law: ~(~X | Y) --> (X & ~Y)
+
+define <4 x i32> @test_v4i32_demorgan_or(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @test_v4i32_demorgan_or(
+; CHECK-NEXT:    [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y_NOT]], [[X:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %x
+  %2 = or  <4 x i32> %1, %y
+  %3 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %2
+  ret <4 x i32> %3
+}
+
+; ~(~X >>s Y) --> (X >>s Y)
+
+define <4 x i32> @test_v4i32_not_ashr_not(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @test_v4i32_not_ashr_not(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %x
+  %2 = ashr <4 x i32> %1, %y
+  %3 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @test_v4i32_not_ashr_not_undef(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @test_v4i32_not_ashr_not_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, %x
+  %2 = ashr <4 x i32> %1, %y
+  %3 = xor  <4 x i32> <i32 -1, i32 -1, i32 undef, i32 -1>, %2
+  ret <4 x i32> %3
+}
+
+; ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
+
+define <4 x i32> @test_v4i32_not_ashr_negative_splatconst(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_ashr_negative_splatconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> <i32 2, i32 2, i32 2, i32 2>, [[A0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = ashr <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>, %a0
+  %2 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_not_ashr_negative_const(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_ashr_negative_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> <i32 2, i32 4, i32 6, i32 8>, [[A0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = ashr <4 x i32> <i32 -3, i32 -5, i32 -7, i32 -9>, %a0
+  %2 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_not_ashr_negative_const_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_ashr_negative_const_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> <i32 2, i32 4, i32 undef, i32 8>, [[A0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = ashr <4 x i32> <i32 -3, i32 -5, i32 undef, i32 -9>, %a0
+  %2 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, %1
+  ret <4 x i32> %2
+}
+
+; ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
+
+define <4 x i32> @test_v4i32_not_lshr_nonnegative_splatconst(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_lshr_nonnegative_splatconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> <i32 -4, i32 -4, i32 -4, i32 -4>, [[A0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = lshr <4 x i32> <i32  3, i32  3, i32  3, i32  3>, %a0
+  %2 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_not_lshr_nonnegative_const(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_lshr_nonnegative_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> <i32 -4, i32 -6, i32 -8, i32 -10>, [[A0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = lshr <4 x i32> <i32  3, i32  5, i32  7, i32  9>, %a0
+  %2 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_not_lshr_nonnegative_const_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_lshr_nonnegative_const_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> <i32 -4, i32 -6, i32 undef, i32 -10>, [[A0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = lshr <4 x i32> <i32  3, i32  5, i32 undef, i32  9>, %a0
+  %2 = xor  <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, %1
+  ret <4 x i32> %2
+}
+
+; ~(C-X) == X-C-1 == X+(-C-1)
+
+define <4 x i32> @test_v4i32_not_sub_splatconst(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_sub_splatconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = sub <4 x i32> <i32  3, i32  3, i32  3, i32  3>, %a0
+  %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_not_sub_const(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_sub_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 -4, i32 -6, i32 0, i32 -16>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = sub <4 x i32> <i32  3, i32  5, i32 -1, i32 15>, %a0
+  %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_not_sub_const_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_not_sub_const_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 -4, i32 undef, i32 0, i32 -16>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = sub <4 x i32> <i32  3, i32 undef, i32 -1, i32 15>, %a0
+  %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, %1
+  ret <4 x i32> %2
+}
+
+; (C - X) ^ signmask -> (C + signmask - X)
+
+define <4 x i32> @test_v4i32_xor_signmask_sub_splatconst(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_signmask_sub_splatconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 -2147483645, i32 -2147483645, i32 -2147483645, i32 -2147483645>, [[A0:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = sub <4 x i32> <i32  3, i32  3, i32  3, i32  3>, %a0
+  %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_xor_signmask_sub_const(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_signmask_sub_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 3, i32 5, i32 -1, i32 15>, [[A0:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = sub <4 x i32> <i32  3, i32 5, i32 -1, i32 15>, %a0
+  %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_xor_signmask_sub_const_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_signmask_sub_const_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 3, i32 undef, i32 -1, i32 15>, [[A0:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = sub <4 x i32> <i32  3, i32 undef, i32 -1, i32 15>, %a0
+  %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef>, %1
+  ret <4 x i32> %2
+}
+
+; (X + C) ^ signmask -> (X + C + signmask)
+
+define <4 x i32> @test_v4i32_xor_signmask_add_splatconst(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_signmask_add_splatconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 -2147483645, i32 -2147483645, i32 -2147483645, i32 -2147483645>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = add <4 x i32> <i32  3, i32  3, i32  3, i32  3>, %a0
+  %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_xor_signmask_add_const(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_signmask_add_const(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 3, i32 5, i32 -1, i32 15>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = add <4 x i32> <i32  3, i32 5, i32 -1, i32 15>, %a0
+  %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %1
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @test_v4i32_xor_signmask_add_const_undef(<4 x i32> %a0) {
+; CHECK-LABEL: @test_v4i32_xor_signmask_add_const_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 3, i32 undef, i32 -1, i32 15>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = add <4 x i32> <i32  3, i32 undef, i32 -1, i32 15>, %a0
+  %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef>, %1
+  ret <4 x i32> %2
+}
diff --git a/llvm/test/Transforms/InstCombine/vector_gep1.ll b/llvm/test/Transforms/InstCombine/vector_gep1.ll
new file mode 100644
index 00000000000..8e5bcf963ea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector_gep1.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@G1 = global i8 zeroinitializer
+
+define <2 x i1> @test(<2 x i8*> %a, <2 x i8*> %b) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8*> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %c = icmp eq <2 x i8*> %a, %b
+  ret <2 x i1> %c
+}
+
+define <2 x i1> @test2(<2 x i8*> %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %c = inttoptr <2 x i32> <i32 1, i32 2> to <2 x i8*>
+  %d = icmp ult <2 x i8*> %c, zeroinitializer
+  ret <2 x i1> %d
+}
+
+define <2 x i1> @test3(<2 x i8*> %a) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %g = getelementptr i8, <2 x i8*> %a, <2 x i32> <i32 1, i32 0>
+  %B = icmp ult <2 x i8*> %g, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define <1 x i1> @test4(<1 x i8*> %a) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret <1 x i1> zeroinitializer
+;
+  %g = getelementptr i8, <1 x i8*> %a, <1 x i32> <i32 1>
+  %B = icmp ult <1 x i8*> %g, zeroinitializer
+  ret <1 x i1> %B
+}
+
+define <2 x i1> @test5(<2 x i8*> %a) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %w = getelementptr i8, <2 x i8*> %a, <2 x i32> zeroinitializer
+  %e = getelementptr i8, <2 x i8*> %w, <2 x i32> <i32 5, i32 9>
+  %g = getelementptr i8, <2 x i8*> %e, <2 x i32> <i32 1, i32 0>
+  %B = icmp ult <2 x i8*> %g, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x { i32, i32 }*> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32*> [[W]]
+;
+  %w = getelementptr {i32, i32}, <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
+  ret <2 x i32*> %w
+}
+
diff --git a/llvm/test/Transforms/InstCombine/vector_gep2.ll b/llvm/test/Transforms/InstCombine/vector_gep2.ll
new file mode 100644
index 00000000000..dcbcf0c4000
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector_gep2.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <2 x i8*> @testa(<2 x i8*> %a) {
+; CHECK-LABEL: @testa(
+; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, <2 x i8*> [[A:%.*]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    ret <2 x i8*> [[G]]
+;
+  %g = getelementptr i8, <2 x i8*> %a, <2 x i32> <i32 0, i32 1>
+  ret <2 x i8*> %g
+}
+
+define <8 x double*> @vgep_s_v8i64(double* %a, <8 x i64>%i) {
+; CHECK-LABEL: @vgep_s_v8i64(
+; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr double, double* [[A:%.*]], <8 x i64> [[I:%.*]]
+; CHECK-NEXT:    ret <8 x double*> [[VECTORGEP]]
+;
+  %VectorGep = getelementptr double, double* %a, <8 x i64> %i
+  ret <8 x double*> %VectorGep
+}
+
+define <8 x double*> @vgep_s_v8i32(double* %a, <8 x i32>%i) {
+; CHECK-LABEL: @vgep_s_v8i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i32> [[I:%.*]] to <8 x i64>
+; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr double, double* [[A:%.*]], <8 x i64> [[TMP1]]
+; CHECK-NEXT:    ret <8 x double*> [[VECTORGEP]]
+;
+  %VectorGep = getelementptr double, double* %a, <8 x i32> %i
+  ret <8 x double*> %VectorGep
+}
+
+define <8 x i8*> @vgep_v8iPtr_i32(<8 x i8*> %a, i32 %i) {
+; CHECK-LABEL: @vgep_v8iPtr_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr i8, <8 x i8*> [[A:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    ret <8 x i8*> [[VECTORGEP]]
+;
+  %VectorGep = getelementptr i8, <8 x i8*> %a, i32 %i
+  ret <8 x i8*> %VectorGep
+}
+
diff --git a/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll b/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
new file mode 100644
index 00000000000..e5da6086319
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; insertelements should fold to shuffle
+define <4 x float> @foo(<4 x float> %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[INS2:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
+  %ins1 = insertelement<4 x float> %x, float 1.0, i32 1
+  %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
+  ret <4 x float> %ins2
+}
+
+; Insert of a constant is canonicalized ahead of insert of a variable.
+
+define <4 x float> @bar(<4 x float> %x, float %a) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %x, float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 1
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
+  %ins1 = insertelement<4 x float> %x, float %a, i32 1
+  %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
+  ret <4 x float> %ins2
+}
+
+define <4 x float> @baz(<4 x float> %x, i32 %a) {
+; CHECK-LABEL: @baz(
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> %x, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 2.000000e+00, i32 %a
+; CHECK-NEXT:    ret <4 x float> [[INS2]]
+;
+  %ins1 = insertelement<4 x float> %x, float 1.0, i32 1
+  %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 %a
+  ret <4 x float> %ins2
+}
+
+; insertelements should fold to shuffle
+define <4 x float> @bazz(<4 x float> %x, i32 %a) {
+; CHECK-LABEL: @bazz(
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> %x, float 1.000000e+00, i32 3
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 5.000000e+00, i32 %a
+; CHECK-NEXT:    [[INS5:%.*]] = shufflevector <4 x float> [[INS2]], <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK-NEXT:    [[INS6:%.*]] = insertelement <4 x float> [[INS5]], float 7.000000e+00, i32 %a
+; CHECK-NEXT:    ret <4 x float> [[INS6]]
+;
+  %ins1 = insertelement<4 x float> %x, float 1.0, i32 3
+  %ins2 = insertelement<4 x float> %ins1, float 5.0, i32 %a
+  %ins3 = insertelement<4 x float> %ins2, float 3.0, i32 2
+  %ins4 = insertelement<4 x float> %ins3, float 1.0, i32 1
+  %ins5 = insertelement<4 x float> %ins4, float 2.0, i32 2
+  %ins6 = insertelement<4 x float> %ins5, float 7.0, i32 %a
+  ret <4 x float> %ins6
+}
+
+; Out of bounds index folds to undef
+define <4 x float> @bazzz(<4 x float> %x) {
+; CHECK-LABEL: @bazzz(
+; CHECK-NEXT:   ret <4 x float> <float undef, float undef, float 2.000000e+00, float undef>
+;
+  %ins1 = insertelement<4 x float> %x, float 1.0, i32 5
+  %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
+  ret <4 x float> %ins2
+}
+
+define <4 x float> @bazzzz(<4 x float> %x) {
+; CHECK-LABEL: @bazzzz(
+; CHECK-NEXT:   ret <4 x float> <float undef, float undef, float 2.000000e+00, float undef>
+;
+  %ins1 = insertelement<4 x float> %x, float 1.0, i32 undef
+  %ins2 = insertelement<4 x float> %ins1, float 2.0, i32 2
+  ret <4 x float> %ins2
+}
+
+define <4 x float> @bazzzzz() {
+; CHECK-LABEL: @bazzzzz(
+; CHECK-NEXT:    ret <4 x float> <float 1.000000e+00, float 5.000000e+00, float 1.000000e+01, float 4.000000e+00>
+;
+  %ins1 = insertelement <4 x float> insertelement (<4 x float> <float 1.0, float 2.0, float 3.0, float undef>, float 4.0, i32 3), float 5.0, i32 1
+  %ins2 = insertelement<4 x float> %ins1, float 10.0, i32 2
+  ret <4 x float> %ins2
+}
+
+define <4 x float> @bazzzzzz(<4 x float> %x, i32 %a) {
+; CHECK-LABEL: @bazzzzzz(
+; CHECK-NEXT:    ret <4 x float> <float undef, float 5.000000e+00, float undef, float 4.000000e+00>
+;
+  %ins1 = insertelement <4 x float> insertelement (<4 x float> shufflevector (<4 x float> undef, <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0> , <4 x i32> <i32 0, i32 5, i32 undef, i32 6> ), float 4.0, i32 3), float 5.0, i32 1
+  ret <4 x float> %ins1
+}
+
+
diff --git a/llvm/test/Transforms/InstCombine/volatile_store.ll b/llvm/test/Transforms/InstCombine/volatile_store.ll
new file mode 100644
index 00000000000..c2f63d6659f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/volatile_store.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@x = weak global i32 0
+
+define void @self_assign_1() {
+; CHECK-LABEL: @self_assign_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = load volatile i32, i32* @x, align 4
+; CHECK-NEXT:    store volatile i32 [[TMP]], i32* @x, align 4
+; CHECK-NEXT:    br label %return
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmp = load volatile i32, i32* @x
+  store volatile i32 %tmp, i32* @x
+  br label %return
+
+return:
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/wcslen-1.ll b/llvm/test/Transforms/InstCombine/wcslen-1.ll
new file mode 100644
index 00000000000..1139048c706
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/wcslen-1.ll
@@ -0,0 +1,222 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test that the wcslen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+declare i64 @wcslen(i32*)
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!llvm.module.flags = !{!0}
+
+@hello = constant [6 x i32] [i32 104, i32 101, i32 108, i32 108, i32 111, i32 0]
+@longer = constant [7 x i32] [i32 108, i32 111, i32 110, i32 103, i32 101, i32 114, i32 0]
+@null = constant [1 x i32] zeroinitializer
+@null_hello = constant [7 x i32] [i32 0, i32 104, i32 101, i32 108, i32 108, i32 111, i32 0]
+@nullstring = constant i32 0
+@a = common global [32 x i32] zeroinitializer, align 1
+@null_hello_mid = constant [13 x i32] [i32 104, i32 101, i32 108, i32 108, i32 111, i32 32, i32 119, i32 111, i32 114, i32 0, i32 108, i32 100, i32 0]
+
+define i64 @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT:    ret i64 5
+;
+  %hello_p = getelementptr [6 x i32], [6 x i32]* @hello, i64 0, i64 0
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT:    ret i64 0
+;
+  %null_p = getelementptr [1 x i32], [1 x i32]* @null, i64 0, i64 0
+  %null_l = call i64 @wcslen(i32* %null_p)
+  ret i64 %null_l
+}
+
+define i64 @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    ret i64 0
+;
+  %null_hello_p = getelementptr [7 x i32], [7 x i32]* @null_hello, i64 0, i64 0
+  %null_hello_l = call i64 @wcslen(i32* %null_hello_p)
+  ret i64 %null_hello_l
+}
+
+define i64 @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+; CHECK-NEXT:    ret i64 0
+;
+  %len = tail call i64 @wcslen(i32* @nullstring) nounwind
+  ret i64 %len
+}
+
+; Check wcslen(x) == 0 --> *x == 0.
+
+define i1 @test_simplify5() {
+; CHECK-LABEL: @test_simplify5(
+; CHECK-NEXT:    ret i1 false
+;
+  %hello_p = getelementptr [6 x i32], [6 x i32]* @hello, i64 0, i64 0
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  %eq_hello = icmp eq i64 %hello_l, 0
+  ret i1 %eq_hello
+}
+
+define i1 @test_simplify6(i32* %str_p) {
+; CHECK-LABEL: @test_simplify6(
+; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i32, i32* [[STR_P:%.*]], align 4
+; CHECK-NEXT:    [[EQ_NULL:%.*]] = icmp eq i32 [[STRLENFIRST]], 0
+; CHECK-NEXT:    ret i1 [[EQ_NULL]]
+;
+  %str_l = call i64 @wcslen(i32* %str_p)
+  %eq_null = icmp eq i64 %str_l, 0
+  ret i1 %eq_null
+}
+
+; Check wcslen(x) != 0 --> *x != 0.
+
+define i1 @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    ret i1 true
+;
+  %hello_p = getelementptr [6 x i32], [6 x i32]* @hello, i64 0, i64 0
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  %ne_hello = icmp ne i64 %hello_l, 0
+  ret i1 %ne_hello
+}
+
+define i1 @test_simplify8(i32* %str_p) {
+; CHECK-LABEL: @test_simplify8(
+; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i32, i32* [[STR_P:%.*]], align 4
+; CHECK-NEXT:    [[NE_NULL:%.*]] = icmp ne i32 [[STRLENFIRST]], 0
+; CHECK-NEXT:    ret i1 [[NE_NULL]]
+;
+  %str_l = call i64 @wcslen(i32* %str_p)
+  %ne_null = icmp ne i64 %str_l, 0
+  ret i1 %ne_null
+}
+
+define i64 @test_simplify9(i1 %x) {
+; CHECK-LABEL: @test_simplify9(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[X:%.*]], i64 5, i64 6
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %hello = getelementptr [6 x i32], [6 x i32]* @hello, i64 0, i64 0
+  %longer = getelementptr [7 x i32], [7 x i32]* @longer, i64 0, i64 0
+  %s = select i1 %x, i32* %hello, i32* %longer
+  %l = call i64 @wcslen(i32* %s)
+  ret i64 %l
+}
+
+; Check the case that should be simplified to a sub instruction.
+; wcslen(@hello + x) --> 5 - x
+
+define i64 @test_simplify10(i32 %x) {
+; CHECK-LABEL: @test_simplify10(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i64 5, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %hello_p = getelementptr inbounds [6 x i32], [6 x i32]* @hello, i32 0, i32 %x
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  ret i64 %hello_l
+}
+
+; wcslen(@null_hello_mid + (x & 7)) --> 9 - (x & 7)
+
+define i64 @test_simplify11(i32 %x) {
+; CHECK-LABEL: @test_simplify11(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[NARROW:%.*]] = sub nuw nsw i32 9, [[AND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %and = and i32 %x, 7
+  %hello_p = getelementptr inbounds [13 x i32], [13 x i32]* @null_hello_mid, i32 0, i32 %and
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  ret i64 %hello_l
+}
+
+; Check cases that shouldn't be simplified.
+
+define i64 @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT:    [[A_L:%.*]] = call i64 @wcslen(i32* getelementptr inbounds ([32 x i32], [32 x i32]* @a, i64 0, i64 0))
+; CHECK-NEXT:    ret i64 [[A_L]]
+;
+  %a_p = getelementptr [32 x i32], [32 x i32]* @a, i64 0, i64 0
+  %a_l = call i64 @wcslen(i32* %a_p)
+  ret i64 %a_l
+}
+
+; wcslen(@null_hello + x) should not be simplified to a sub instruction.
+
+define i64 @test_no_simplify2(i32 %x) {
+; CHECK-LABEL: @test_no_simplify2(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @null_hello, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(i32* nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds [7 x i32], [7 x i32]* @null_hello, i32 0, i32 %x
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_no_simplify2_no_null_opt(i32 %x) #0 {
+; CHECK-LABEL: @test_no_simplify2_no_null_opt(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @null_hello, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(i32* [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds [7 x i32], [7 x i32]* @null_hello, i32 0, i32 %x
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  ret i64 %hello_l
+}
+
+; wcslen(@null_hello_mid + (x & 15)) should not be simplified to a sub instruction.
+
+define i64 @test_no_simplify3(i32 %x) {
+; CHECK-LABEL: @test_no_simplify3(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[AND]] to i64
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i32], [13 x i32]* @null_hello_mid, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(i32* nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %and = and i32 %x, 15
+  %hello_p = getelementptr inbounds [13 x i32], [13 x i32]* @null_hello_mid, i32 0, i32 %and
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_no_simplify3_no_null_opt(i32 %x) #0 {
+; CHECK-LABEL: @test_no_simplify3_no_null_opt(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[AND]] to i64
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i32], [13 x i32]* @null_hello_mid, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(i32* [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %and = and i32 %x, 15
+  %hello_p = getelementptr inbounds [13 x i32], [13 x i32]* @null_hello_mid, i32 0, i32 %and
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  ret i64 %hello_l
+}
+
+@str16 = constant [1 x i16] [i16 0]
+
+define i64 @test_no_simplify4() {
+; CHECK-LABEL: @test_no_simplify4(
+; CHECK-NEXT:    [[L:%.*]] = call i64 @wcslen(i32* bitcast ([1 x i16]* @str16 to i32*))
+; CHECK-NEXT:    ret i64 [[L]]
+;
+  %l = call i64 @wcslen(i32* bitcast ([1 x i16]* @str16 to i32*))
+  ret i64 %l
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/InstCombine/wcslen-2.ll b/llvm/test/Transforms/InstCombine/wcslen-2.ll
new file mode 100644
index 00000000000..15f1fdbb1ad
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/wcslen-2.ll
@@ -0,0 +1,21 @@
+; Test that the wcslen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!llvm.module.flags = !{!0}
+
+@hello = constant [6 x i32] [i32 104, i32 101, i32 108, i32 108, i32 111, i32 0]
+
+declare i64 @wcslen(i32*, i32)
+
+define i64 @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+  %hello_p = getelementptr [6 x i32], [6 x i32]* @hello, i64 0, i64 0
+  %hello_l = call i64 @wcslen(i32* %hello_p, i32 187)
+; CHECK-NEXT: %hello_l = call i64 @wcslen
+  ret i64 %hello_l
+; CHECK-NEXT: ret i64 %hello_l
+}
diff --git a/llvm/test/Transforms/InstCombine/wcslen-3.ll b/llvm/test/Transforms/InstCombine/wcslen-3.ll
new file mode 100644
index 00000000000..e789442f490
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/wcslen-3.ll
@@ -0,0 +1,197 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Test that the wcslen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Test behavior for wchar_size==2
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"wchar_size", i32 2}
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+declare i64 @wcslen(i16*)
+
+@hello = constant [6 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 0]
+@longer = constant [7 x i16] [i16 108, i16 111, i16 110, i16 103, i16 101, i16 114, i16 0]
+@null = constant [1 x i16] zeroinitializer
+@null_hello = constant [7 x i16] [i16 0, i16 104, i16 101, i16 108, i16 108, i16 111, i16 0]
+@nullstring = constant i16 0
+@a = common global [32 x i16] zeroinitializer, align 1
+@null_hello_mid = constant [13 x i16] [i16 104, i16 101, i16 108, i16 108, i16 111, i16 32, i16 119, i16 111, i16 114, i16 0, i16 108, i16 100, i16 0]
+
+define i64 @test_simplify1() {
+; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT:    ret i64 5
+;
+  %hello_p = getelementptr [6 x i16], [6 x i16]* @hello, i64 0, i64 0
+  %hello_l = call i64 @wcslen(i16* %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_simplify2() {
+; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT:    ret i64 0
+;
+  %null_p = getelementptr [1 x i16], [1 x i16]* @null, i64 0, i64 0
+  %null_l = call i64 @wcslen(i16* %null_p)
+  ret i64 %null_l
+}
+
+define i64 @test_simplify3() {
+; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT:    ret i64 0
+;
+  %null_hello_p = getelementptr [7 x i16], [7 x i16]* @null_hello, i64 0, i64 0
+  %null_hello_l = call i64 @wcslen(i16* %null_hello_p)
+  ret i64 %null_hello_l
+}
+
+define i64 @test_simplify4() {
+; CHECK-LABEL: @test_simplify4(
+; CHECK-NEXT:    ret i64 0
+;
+  %len = tail call i64 @wcslen(i16* @nullstring) nounwind
+  ret i64 %len
+}
+
+; Check wcslen(x) == 0 --> *x == 0.
+
+define i1 @test_simplify5() {
+; CHECK-LABEL: @test_simplify5(
+; CHECK-NEXT:    ret i1 false
+;
+  %hello_p = getelementptr [6 x i16], [6 x i16]* @hello, i64 0, i64 0
+  %hello_l = call i64 @wcslen(i16* %hello_p)
+  %eq_hello = icmp eq i64 %hello_l, 0
+  ret i1 %eq_hello
+}
+
+define i1 @test_simplify6(i16* %str_p) {
+; CHECK-LABEL: @test_simplify6(
+; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i16, i16* [[STR_P:%.*]], align 2
+; CHECK-NEXT:    [[EQ_NULL:%.*]] = icmp eq i16 [[STRLENFIRST]], 0
+; CHECK-NEXT:    ret i1 [[EQ_NULL]]
+;
+  %str_l = call i64 @wcslen(i16* %str_p)
+  %eq_null = icmp eq i64 %str_l, 0
+  ret i1 %eq_null
+}
+
+; Check wcslen(x) != 0 --> *x != 0.
+
+define i1 @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT:    ret i1 true
+;
+  %hello_p = getelementptr [6 x i16], [6 x i16]* @hello, i64 0, i64 0
+  %hello_l = call i64 @wcslen(i16* %hello_p)
+  %ne_hello = icmp ne i64 %hello_l, 0
+  ret i1 %ne_hello
+}
+
+define i1 @test_simplify8(i16* %str_p) {
+; CHECK-LABEL: @test_simplify8(
+; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i16, i16* [[STR_P:%.*]], align 2
+; CHECK-NEXT:    [[NE_NULL:%.*]] = icmp ne i16 [[STRLENFIRST]], 0
+; CHECK-NEXT:    ret i1 [[NE_NULL]]
+;
+  %str_l = call i64 @wcslen(i16* %str_p)
+  %ne_null = icmp ne i64 %str_l, 0
+  ret i1 %ne_null
+}
+
+define i64 @test_simplify9(i1 %x) {
+; CHECK-LABEL: @test_simplify9(
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[X:%.*]], i64 5, i64 6
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %hello = getelementptr [6 x i16], [6 x i16]* @hello, i64 0, i64 0
+  %longer = getelementptr [7 x i16], [7 x i16]* @longer, i64 0, i64 0
+  %s = select i1 %x, i16* %hello, i16* %longer
+  %l = call i64 @wcslen(i16* %s)
+  ret i64 %l
+}
+
+; Check the case that should be simplified to a sub instruction.
+; wcslen(@hello + x) --> 5 - x
+
+define i64 @test_simplify10(i16 %x) {
+; CHECK-LABEL: @test_simplify10(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i64 5, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %hello_p = getelementptr inbounds [6 x i16], [6 x i16]* @hello, i16 0, i16 %x
+  %hello_l = call i64 @wcslen(i16* %hello_p)
+  ret i64 %hello_l
+}
+
+; wcslen(@null_hello_mid + (x & 7)) --> 9 - (x & 7)
+
+define i64 @test_simplify11(i16 %x) {
+; CHECK-LABEL: @test_simplify11(
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X:%.*]], 7
+; CHECK-NEXT:    [[NARROW:%.*]] = sub nuw nsw i16 9, [[AND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %and = and i16 %x, 7
+  %hello_p = getelementptr inbounds [13 x i16], [13 x i16]* @null_hello_mid, i16 0, i16 %and
+  %hello_l = call i64 @wcslen(i16* %hello_p)
+  ret i64 %hello_l
+}
+
+; Check cases that shouldn't be simplified.
+
+define i64 @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT:    [[A_L:%.*]] = call i64 @wcslen(i16* getelementptr inbounds ([32 x i16], [32 x i16]* @a, i64 0, i64 0))
+; CHECK-NEXT:    ret i64 [[A_L]]
+;
+  %a_p = getelementptr [32 x i16], [32 x i16]* @a, i64 0, i64 0
+  %a_l = call i64 @wcslen(i16* %a_p)
+  ret i64 %a_l
+}
+
+; wcslen(@null_hello + x) should not be simplified to a sub instruction.
+
+define i64 @test_no_simplify2(i16 %x) {
+; CHECK-LABEL: @test_no_simplify2(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i64
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [7 x i16], [7 x i16]* @null_hello, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(i16* nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds [7 x i16], [7 x i16]* @null_hello, i16 0, i16 %x
+  %hello_l = call i64 @wcslen(i16* %hello_p)
+  ret i64 %hello_l
+}
+
+; wcslen(@null_hello_mid + (x & 15)) should not be simplified to a sub instruction.
+
+define i64 @test_no_simplify3(i16 %x) {
+; CHECK-LABEL: @test_no_simplify3(
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X:%.*]], 15
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[AND]] to i64
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i16], [13 x i16]* @null_hello_mid, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(i16* nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %and = and i16 %x, 15
+  %hello_p = getelementptr inbounds [13 x i16], [13 x i16]* @null_hello_mid, i16 0, i16 %and
+  %hello_l = call i64 @wcslen(i16* %hello_p)
+  ret i64 %hello_l
+}
+
+@str32 = constant [1 x i32] [i32 0]
+
+; This could in principle be simplified, but the current implementation bails on
+; type mismatches.
+define i64 @test_no_simplify4() {
+; CHECK-LABEL: @test_no_simplify4(
+; CHECK-NEXT:    [[L:%.*]] = call i64 @wcslen(i16* bitcast ([1 x i32]* @str32 to i16*))
+; CHECK-NEXT:    ret i64 [[L]]
+;
+  %l = call i64 @wcslen(i16* bitcast ([1 x i32]* @str32 to i16*))
+  ret i64 %l
+}
diff --git a/llvm/test/Transforms/InstCombine/wcslen-4.ll b/llvm/test/Transforms/InstCombine/wcslen-4.ll
new file mode 100644
index 00000000000..07832288965
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/wcslen-4.ll
@@ -0,0 +1,20 @@
+; Test that the wcslen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Without the wchar_size metadata we should see no optimization happening.
+
+@hello = constant [6 x i32] [i32 104, i32 101, i32 108, i32 108, i32 111, i32 0]
+
+declare i64 @wcslen(i32*)
+
+define i64 @test_no_simplify1() {
+; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT: %hello_l = call i64 @wcslen(i32* getelementptr inbounds ([6 x i32], [6 x i32]* @hello, i64 0, i64 0))
+; CHECK-NEXT: ret i64 %hello_l
+  %hello_p = getelementptr [6 x i32], [6 x i32]* @hello, i64 0, i64 0
+  %hello_l = call i64 @wcslen(i32* %hello_p)
+  ret i64 %hello_l
+}
diff --git a/llvm/test/Transforms/InstCombine/weak-symbols.ll b/llvm/test/Transforms/InstCombine/weak-symbols.ll
new file mode 100644
index 00000000000..3f92e64d2ec
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/weak-symbols.ll
@@ -0,0 +1,33 @@
+; PR4738 - Test that the library call simplifier doesn't assume anything about
+; weak symbols.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@real_init = weak_odr constant [2 x i8] c"y\00"
+@fake_init = weak constant [2 x i8] c"y\00"
+@.str = private constant [2 x i8] c"y\00"
+
+define i32 @foo() nounwind {
+; CHECK-LABEL: define i32 @foo(
+; CHECK: call i32 @strcmp
+; CHECK: ret i32 %temp1
+
+entry:
+  %str1 = getelementptr inbounds [2 x i8], [2 x i8]* @fake_init, i64 0, i64 0
+  %str2 = getelementptr inbounds [2 x i8], [2 x i8]* @.str, i64 0, i64 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2) nounwind readonly
+  ret i32 %temp1
+}
+
+define i32 @bar() nounwind {
+; CHECK-LABEL: define i32 @bar(
+; CHECK: ret i32 0
+
+entry:
+  %str1 = getelementptr inbounds [2 x i8], [2 x i8]* @real_init, i64 0, i64 0
+  %str2 = getelementptr inbounds [2 x i8], [2 x i8]* @.str, i64 0, i64 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2) nounwind readonly
+  ret i32 %temp1
+}
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly
diff --git a/llvm/test/Transforms/InstCombine/win-math.ll b/llvm/test/Transforms/InstCombine/win-math.ll
new file mode 100644
index 00000000000..38ed949e949
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/win-math.ll
@@ -0,0 +1,335 @@
+; RUN: opt < %s -O2 -S -mtriple=i386-pc-windows-msvc18   | FileCheck %s --check-prefixes=CHECK,MSVCXX,MSVC32
+; RUN: opt < %s -O2 -S -mtriple=i386-pc-windows-msvc     | FileCheck %s --check-prefixes=CHECK,MSVC19,MSVC51
+; RUN: opt < %s -O2 -S -mtriple=x86_64-pc-windows-msvc17 | FileCheck %s --check-prefixes=CHECK,MSVCXX,MSVC64
+; RUN: opt < %s -O2 -S -mtriple=x86_64-pc-win32          | FileCheck %s --check-prefixes=CHECK,MSVC19,MSVC83
+; RUN: opt < %s -O2 -S -mtriple=i386-pc-mingw32          | FileCheck %s --check-prefixes=CHECK,MINGW32
+; RUN: opt < %s -O2 -S -mtriple=x86_64-pc-mingw32        | FileCheck %s --check-prefixes=CHECK,MINGW64
+
+; x86 win32 msvcrt does not provide entry points for single-precision libm.
+; x86-64 win32 msvcrt does, but with exceptions
+; msvcrt does not provide all of C99 math, but mingw32 does.
+
+declare double @acos(double %x)
+define float @float_acos(float %x) nounwind readnone {
+; CHECK-LABEL: @float_acos(
+; MSVCXX-NOT: float @acosf
+; MSVCXX: double @acos
+; MSVC19-NOT: float @acosf
+; MSVC19: double @acos
+    %1 = fpext float %x to double
+    %2 = call double @acos(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @asin(double %x)
+define float @float_asin(float %x) nounwind readnone {
+; CHECK-LABEL: @float_asin(
+; MSVCXX-NOT: float @asinf
+; MSVCXX: double @asin
+; MSVC19-NOT: float @asinf
+; MSVC19: double @asin
+    %1 = fpext float %x to double
+    %2 = call double @asin(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @atan(double %x)
+define float @float_atan(float %x) nounwind readnone {
+; CHECK-LABEL: @float_atan(
+; MSVCXX-NOT: float @atanf
+; MSVCXX: double @atan
+; MSVC19-NOT: float @atanf
+; MSVC19: double @atan
+    %1 = fpext float %x to double
+    %2 = call double @atan(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @atan2(double %x, double %y)
+define float @float_atan2(float %x, float %y) nounwind readnone {
+; CHECK-LABEL: @float_atan2(
+; MSVCXX-NOT: float @atan2f
+; MSVCXX: double @atan2
+; MSVC19-NOT: float @atan2f
+; MSVC19: double @atan2
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @atan2(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @ceil(double %x)
+define float @float_ceil(float %x) nounwind readnone {
+; CHECK-LABEL: @float_ceil(
+; MSVCXX-NOT: float @ceilf
+; MSVCXX: float @llvm.ceil.f32
+; MSVC19-NOT: double @ceil
+; MSVC19: float @llvm.ceil.f32
+; MINGW32-NOT: double @ceil
+; MINGW32: float @llvm.ceil.f32
+; MINGW64-NOT: double @ceil
+; MINGW64: float @llvm.ceil.f32
+    %1 = fpext float %x to double
+    %2 = call double @ceil(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @_copysign(double %x)
+define float @float_copysign(float %x) nounwind readnone {
+; CHECK-LABEL: @float_copysign(
+; MSVCXX-NOT: float @_copysignf
+; MSVCXX: double @_copysign
+; MSVC19-NOT: float @_copysignf
+; MSVC19: double @_copysign
+    %1 = fpext float %x to double
+    %2 = call double @_copysign(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @cos(double %x)
+define float @float_cos(float %x) nounwind readnone {
+; CHECK-LABEL: @float_cos(
+; MSVCXX-NOT: float @cosf
+; MSVCXX: double @cos
+; MSVC19-NOT: float @cosf
+; MSVC19: double @cos
+    %1 = fpext float %x to double
+    %2 = call double @cos(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @cosh(double %x)
+define float @float_cosh(float %x) nounwind readnone {
+; CHECK-LABEL: @float_cosh(
+; MSVCXX-NOT: float @coshf
+; MSVCXX: double @cosh
+; MSVC19-NOT: float @coshf
+; MSVC19: double @cosh
+    %1 = fpext float %x to double
+    %2 = call double @cosh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @exp(double %x, double %y)
+define float @float_exp(float %x, float %y) nounwind readnone {
+; CHECK-LABEL: @float_exp(
+; MSVCXX-NOT: float @expf
+; MSVCXX: double @exp
+; MSVC19-NOT: float @expf
+; MSVC19: double @exp
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @exp(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @fabs(double %x, double %y)
+define float @float_fabs(float %x, float %y) nounwind readnone {
+; CHECK-LABEL: @float_fabs(
+; MSVCXX-NOT: float @fabsf
+; MSVCXX: double @fabs
+; MSVC19-NOT: float @fabsf
+; MSVC19: double @fabs
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @fabs(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @floor(double %x)
+define float @float_floor(float %x) nounwind readnone {
+; CHECK-LABEL: @float_floor(
+; MSVCXX-NOT: float @floorf
+; MSVCXX: float @llvm.floor.f32
+; MSVC19-NOT: double @floor
+; MSVC19: float @llvm.floor.f32
+; MINGW32-NOT: double @floor
+; MINGW32: float @llvm.floor.f32
+; MINGW64-NOT: double @floor
+; MINGW64: float @llvm.floor.f32
+    %1 = fpext float %x to double
+    %2 = call double @floor(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @fmod(double %x, double %y)
+define float @float_fmod(float %x, float %y) nounwind readnone {
+; MSVCXX-LABEL: @float_fmod(
+; MSVCXX-NOT: float @fmodf
+; MSVCXX: double @fmod
+; MSVC19-NOT: float @fmodf
+; MSVC19: double @fmod
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @fmod(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @log(double %x)
+define float @float_log(float %x) nounwind readnone {
+; CHECK-LABEL: @float_log(
+; MSVCXX-NOT: float @logf
+; MSVCXX: double @log
+; MSVC19-NOT: float @logf
+; MSVC19: double @log
+    %1 = fpext float %x to double
+    %2 = call double @log(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @logb(double %x)
+define float @float_logb(float %x) nounwind readnone {
+; CHECK-LABEL: @float_logb(
+; MSVCXX-NOT: float @logbf
+; MSVCXX: double @logb
+; MSVC19-NOT: float @logbf
+; MSVC19: double @logb
+    %1 = fpext float %x to double
+    %2 = call double @logb(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @pow(double %x, double %y)
+define float @float_pow(float %x, float %y) nounwind readnone {
+; CHECK-LABEL: @float_pow(
+; MSVCXX-NOT: float @powf
+; MSVCXX: double @pow
+; MSVC19-NOT: float @powf
+; MSVC19: double @pow
+    %1 = fpext float %x to double
+    %2 = fpext float %y to double
+    %3 = call double @pow(double %1, double %2)
+    %4 = fptrunc double %3 to float
+    ret float %4
+}
+
+declare double @sin(double %x)
+define float @float_sin(float %x) nounwind readnone {
+; CHECK-LABEL: @float_sin(
+; MSVCXX-NOT: float @sinf
+; MSVCXX: double @sin
+; MSVC19-NOT: float @sinf
+; MSVC19: double @sin
+    %1 = fpext float %x to double
+    %2 = call double @sin(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @sinh(double %x)
+define float @float_sinh(float %x) nounwind readnone {
+; CHECK-LABEL: @float_sinh(
+; MSVCXX-NOT: float @sinhf
+; MSVCXX: double @sinh
+; MSVC19-NOT: float @sinhf
+; MSVC19: double @sinh
+    %1 = fpext float %x to double
+    %2 = call double @sinh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @sqrt(double %x)
+define float @float_sqrt(float %x) nounwind readnone {
+; CHECK-LABEL: @float_sqrt(
+; MSVC32-NOT: float @sqrtf
+; MSVC32: double @sqrt
+; MSVC51-NOT: float @sqrtf
+; MSVC51: double @sqrt
+; MSVC64-NOT: double @sqrt
+; MSVC64: float @sqrtf
+; MSVC83-NOT: double @sqrt
+; MSVC83: float @sqrtf
+; MINGW32-NOT: double @sqrt
+; MINGW32: float @sqrtf
+; MINGW64-NOT: double @sqrt
+; MINGW64: float @sqrtf
+    %1 = fpext float %x to double
+    %2 = call double @sqrt(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @tan(double %x)
+define float @float_tan(float %x) nounwind readnone {
+; CHECK-LABEL: @float_tan(
+; MSVCXX-NOT: float @tanf
+; MSVCXX: double @tan
+; MSVC19-NOT: float @tanf
+; MSVC19: double @tan
+    %1 = fpext float %x to double
+    %2 = call double @tan(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare double @tanh(double %x)
+define float @float_tanh(float %x) nounwind readnone {
+; CHECK-LABEL: @float_tanh(
+; MSVCXX-NOT: float @tanhf
+; MSVCXX: double @tanh
+; MSVC19-NOT: float @tanhf
+; MSVC19: double @tanh
+    %1 = fpext float %x to double
+    %2 = call double @tanh(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+; win32 does not have roundf; mingw32 does
+declare double @round(double %x)
+define float @float_round(float %x) nounwind readnone {
+; CHECK-LABEL: @float_round(
+; MSVCXX-NOT: double @roundf
+; MSVCXX: double @round
+; MSVC19-NOT: double @round
+; MSVC19: float @llvm.round.f32
+; MINGW32-NOT: double @round
+; MINGW32: float @llvm.round.f32
+; MINGW64-NOT: double @round
+; MINGW64: float @llvm.round.f32
+    %1 = fpext float %x to double
+    %2 = call double @round(double %1)
+    %3 = fptrunc double %2 to float
+    ret float %3
+}
+
+declare float @powf(float, float)
+
+; win32 lacks sqrtf & fabsf, win64 lacks fabsf, but
+; calls to the intrinsics can be emitted instead.
+define float @float_powsqrt(float %x) nounwind readnone {
+; CHECK-LABEL: @float_powsqrt(
+; MSVC32-NOT: float @sqrtf
+; MSVC32: float @powf
+; MSVC51-NOT: float @sqrtf
+; MSVC51: float @powf
+; MSVC64-NOT: float @powf
+; MSVC64: float @sqrtf
+; MSVC64: float @llvm.fabs.f32(
+; MSVC83-NOT: float @powf
+; MSVC83: float @sqrtf
+; MSVC83: float @llvm.fabs.f32(
+; MINGW32-NOT: float @powf
+; MINGW32: float @sqrtf
+; MINGW32: float @llvm.fabs.f32
+; MINGW64-NOT: float @powf
+; MINGW64: float @sqrtf
+; MINGW64: float @llvm.fabs.f32(
+    %1 = call float @powf(float %x, float 0.5)
+    ret float %1
+}
diff --git a/llvm/test/Transforms/InstCombine/with_overflow.ll b/llvm/test/Transforms/InstCombine/with_overflow.ll
new file mode 100644
index 00000000000..e80da2afe2f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/with_overflow.ll
@@ -0,0 +1,606 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
+declare { i8, i1 } @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
+declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone
+declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone
+declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
+declare { i8, i1 } @llvm.smul.with.overflow.i8(i8, i8) nounwind readnone
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+define i8 @uaddtest1(i8 %A, i8 %B) {
+; CHECK-LABEL: @uaddtest1(
+; CHECK-NEXT:    [[Y:%.*]] = add i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i8 [[Y]]
+;
+  %x = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
+  %y = extractvalue { i8, i1 } %x, 0
+  ret i8 %y
+}
+
+define i8 @uaddtest2(i8 %A, i8 %B, i1* %overflowPtr) {
+; CHECK-LABEL: @uaddtest2(
+; CHECK-NEXT:    [[AND_A:%.*]] = and i8 [[A:%.*]], 127
+; CHECK-NEXT:    [[AND_B:%.*]] = and i8 [[B:%.*]], 127
+; CHECK-NEXT:    [[X:%.*]] = add nuw i8 [[AND_A]], [[AND_B]]
+; CHECK-NEXT:    store i1 false, i1* [[OVERFLOWPTR:%.*]], align 1
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %and.A = and i8 %A, 127
+  %and.B = and i8 %B, 127
+  %x = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %and.A, i8 %and.B)
+  %y = extractvalue { i8, i1 } %x, 0
+  %z = extractvalue { i8, i1 } %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+}
+
+define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) {
+; CHECK-LABEL: @uaddtest3(
+; CHECK-NEXT:    [[OR_A:%.*]] = or i8 [[A:%.*]], -128
+; CHECK-NEXT:    [[OR_B:%.*]] = or i8 [[B:%.*]], -128
+; CHECK-NEXT:    [[X:%.*]] = add i8 [[OR_A]], [[OR_B]]
+; CHECK-NEXT:    store i1 true, i1* [[OVERFLOWPTR:%.*]], align 1
+; CHECK-NEXT:    ret i8 [[X]]
+;
+  %or.A = or i8 %A, -128
+  %or.B = or i8 %B, -128
+  %x = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %or.A, i8 %or.B)
+  %y = extractvalue { i8, i1 } %x, 0
+  %z = extractvalue { i8, i1 } %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+}
+
+define i8 @uaddtest4(i8 %A, i1* %overflowPtr) {
+; CHECK-LABEL: @uaddtest4(
+; CHECK-NEXT:    ret i8 undef
+;
+  %x = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 %A)
+  %y = extractvalue { i8, i1 } %x, 0
+  %z = extractvalue { i8, i1 } %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+}
+
+define i8 @uaddtest5(i8 %A, i1* %overflowPtr) {
+; CHECK-LABEL: @uaddtest5(
+; CHECK-NEXT:    store i1 false, i1* [[OVERFLOWPTR:%.*]], align 1
+; CHECK-NEXT:    ret i8 [[A:%.*]]
+;
+  %x = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 %A)
+  %y = extractvalue { i8, i1 } %x, 0
+  %z = extractvalue { i8, i1 } %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+}
+
+define i1 @uaddtest6(i8 %A, i8 %B) {
+; CHECK-LABEL: @uaddtest6(
+; CHECK-NEXT:    [[Z:%.*]] = icmp ugt i8 [[A:%.*]], 3
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %x = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %A, i8 -4)
+  %z = extractvalue { i8, i1 } %x, 1
+  ret i1 %z
+}
+
+define i8 @uaddtest7(i8 %A, i8 %B) {
+; CHECK-LABEL: @uaddtest7(
+; CHECK-NEXT:    [[Z:%.*]] = add i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+  %x = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
+  %z = extractvalue { i8, i1 } %x, 0
+  ret i8 %z
+}
+
+; PR20194
+define { i32, i1 } @saddtest_nsw(i8 %a, i8 %b) {
+; CHECK-LABEL: @saddtest_nsw(
+; CHECK-NEXT:    [[AA:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[BB:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT:    [[X:%.*]] = add nsw i32 [[AA]], [[BB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[X]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %aa = sext i8 %a to i32
+  %bb = sext i8 %b to i32
+  %x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %aa, i32 %bb)
+  ret { i32, i1 } %x
+}
+
+define { i32, i1 } @uaddtest_nuw(i32 %a, i32 %b) {
+; CHECK-LABEL: @uaddtest_nuw(
+; CHECK-NEXT:    [[AA:%.*]] = and i32 [[A:%.*]], 2147483647
+; CHECK-NEXT:    [[BB:%.*]] = and i32 [[B:%.*]], 2147483647
+; CHECK-NEXT:    [[X:%.*]] = add nuw i32 [[AA]], [[BB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[X]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %aa = and i32 %a, 2147483647
+  %bb = and i32 %b, 2147483647
+  %x = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %aa, i32 %bb)
+  ret { i32, i1 } %x
+}
+
+define { i32, i1 } @ssubtest_nsw(i8 %a, i8 %b) {
+; CHECK-LABEL: @ssubtest_nsw(
+; CHECK-NEXT:    [[AA:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[BB:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT:    [[X:%.*]] = sub nsw i32 [[AA]], [[BB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[X]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %aa = sext i8 %a to i32
+  %bb = sext i8 %b to i32
+  %x = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %aa, i32 %bb)
+  ret { i32, i1 } %x
+}
+
+define { i32, i1 } @usubtest_nuw(i32 %a, i32 %b) {
+; CHECK-LABEL: @usubtest_nuw(
+; CHECK-NEXT:    [[AA:%.*]] = or i32 [[A:%.*]], -2147483648
+; CHECK-NEXT:    [[BB:%.*]] = and i32 [[B:%.*]], 2147483647
+; CHECK-NEXT:    [[X:%.*]] = sub nuw i32 [[AA]], [[BB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[X]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %aa = or i32 %a, 2147483648
+  %bb = and i32 %b, 2147483647
+  %x = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %aa, i32 %bb)
+  ret { i32, i1 } %x
+}
+
+define { i32, i1 } @smultest1_nsw(i32 %a, i32 %b) {
+; CHECK-LABEL: @smultest1_nsw(
+; CHECK-NEXT:    [[AA:%.*]] = and i32 [[A:%.*]], 4095
+; CHECK-NEXT:    [[BB:%.*]] = and i32 [[B:%.*]], 524287
+; CHECK-NEXT:    [[X:%.*]] = mul nuw nsw i32 [[AA]], [[BB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[X]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %aa = and i32 %a, 4095 ; 0xfff
+  %bb = and i32 %b, 524287; 0x7ffff
+  %x = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %aa, i32 %bb)
+  ret { i32, i1 } %x
+}
+
+define { i32, i1 } @smultest2_nsw(i32 %a, i32 %b) {
+; CHECK-LABEL: @smultest2_nsw(
+; CHECK-NEXT:    [[AA:%.*]] = ashr i32 [[A:%.*]], 16
+; CHECK-NEXT:    [[BB:%.*]] = ashr i32 [[B:%.*]], 16
+; CHECK-NEXT:    [[X:%.*]] = mul nsw i32 [[AA]], [[BB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[X]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %aa = ashr i32 %a, 16
+  %bb = ashr i32 %b, 16
+  %x = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %aa, i32 %bb)
+  ret { i32, i1 } %x
+}
+
+define { i32, i1 } @smultest3_sw(i32 %a, i32 %b) {
+; CHECK-LABEL: @smultest3_sw(
+; CHECK-NEXT:    [[AA:%.*]] = ashr i32 [[A:%.*]], 16
+; CHECK-NEXT:    [[BB:%.*]] = ashr i32 [[B:%.*]], 15
+; CHECK-NEXT:    [[X:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[AA]], i32 [[BB]])
+; CHECK-NEXT:    ret { i32, i1 } [[X]]
+;
+  %aa = ashr i32 %a, 16
+  %bb = ashr i32 %b, 15
+  %x = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %aa, i32 %bb)
+  ret { i32, i1 } %x
+}
+
+define { i32, i1 } @umultest_nuw(i32 %a, i32 %b) {
+; CHECK-LABEL: @umultest_nuw(
+; CHECK-NEXT:    [[AA:%.*]] = and i32 [[A:%.*]], 65535
+; CHECK-NEXT:    [[BB:%.*]] = and i32 [[B:%.*]], 65535
+; CHECK-NEXT:    [[X:%.*]] = mul nuw i32 [[AA]], [[BB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[X]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %aa = and i32 %a, 65535 ; 0xffff
+  %bb = and i32 %b, 65535 ; 0xffff
+  %x = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %aa, i32 %bb)
+  ret { i32, i1 } %x
+}
+
+define i8 @umultest1(i8 %A, i1* %overflowPtr) {
+; CHECK-LABEL: @umultest1(
+; CHECK-NEXT:    store i1 false, i1* [[OVERFLOWPTR:%.*]], align 1
+; CHECK-NEXT:    ret i8 0
+;
+  %x = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 0, i8 %A)
+  %y = extractvalue { i8, i1 } %x, 0
+  %z = extractvalue { i8, i1 } %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+}
+
+define i8 @umultest2(i8 %A, i1* %overflowPtr) {
+; CHECK-LABEL: @umultest2(
+; CHECK-NEXT:    store i1 false, i1* [[OVERFLOWPTR:%.*]], align 1
+; CHECK-NEXT:    ret i8 [[A:%.*]]
+;
+  %x = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 %A)
+  %y = extractvalue { i8, i1 } %x, 0
+  %z = extractvalue { i8, i1 } %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+}
+
+define i32 @umultest3(i32 %n) nounwind {
+; CHECK-LABEL: @umultest3(
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[N:%.*]], 2
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i32 [[SHR]], 3
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %shr = lshr i32 %n, 2
+  %mul = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %shr, i32 3)
+  %ov = extractvalue { i32, i1 } %mul, 1
+  %res = extractvalue { i32, i1 } %mul, 0
+  %ret = select i1 %ov, i32 -1, i32 %res
+  ret i32 %ret
+}
+
+define i32 @umultest4(i32 %n) nounwind {
+; CHECK-LABEL: @umultest4(
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[N:%.*]], 1
+; CHECK-NEXT:    [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[SHR]], i32 4)
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
+; CHECK-NEXT:    [[RES:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
+; CHECK-NEXT:    [[RET:%.*]] = select i1 [[OV]], i32 -1, i32 [[RES]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %shr = lshr i32 %n, 1
+  %mul = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %shr, i32 4)
+  %ov = extractvalue { i32, i1 } %mul, 1
+  %res = extractvalue { i32, i1 } %mul, 0
+  %ret = select i1 %ov, i32 -1, i32 %res
+  ret i32 %ret
+}
+
+define { i32, i1 } @umultest5(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @umultest5(
+; CHECK-NEXT:    [[OR_X:%.*]] = or i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    [[OR_Y:%.*]] = or i32 [[Y:%.*]], -2147483648
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[OR_X]], [[OR_Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 true }, i32 [[MUL]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %or_x = or i32 %x, 2147483648
+  %or_y = or i32 %y, 2147483648
+  %mul = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %or_x, i32 %or_y)
+  ret { i32, i1 } %mul
+}
+
+define i1 @overflow_div_add(i32 %v1, i32 %v2) nounwind {
+; CHECK-LABEL: @overflow_div_add(
+; CHECK-NEXT:    ret i1 false
+;
+  %div = sdiv i32 %v1, 2
+  %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %div, i32 1)
+  %obit = extractvalue { i32, i1 } %t, 1
+  ret i1 %obit
+}
+
+define i1 @overflow_div_sub(i32 %v1, i32 %v2) nounwind {
+  ; Check cases where the known sign bits are larger than the word size.
+; CHECK-LABEL: @overflow_div_sub(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = ashr i32 %v1, 18
+  %div = sdiv i32 %a, 65536
+  %t = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %div, i32 1)
+  %obit = extractvalue { i32, i1 } %t, 1
+  ret i1 %obit
+}
+
+define i1 @overflow_mod_mul(i32 %v1, i32 %v2) nounwind {
+; CHECK-LABEL: @overflow_mod_mul(
+; CHECK-NEXT:    ret i1 false
+;
+  %rem = srem i32 %v1, 1000
+  %t = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %rem, i32 %rem)
+  %obit = extractvalue { i32, i1 } %t, 1
+  ret i1 %obit
+}
+
+define i1 @overflow_mod_overflow_mul(i32 %v1, i32 %v2) nounwind {
+; CHECK-LABEL: @overflow_mod_overflow_mul(
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[V1:%.*]], 65537
+; CHECK-NEXT:    [[T:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[REM]], i32 [[REM]])
+; CHECK-NEXT:    [[OBIT:%.*]] = extractvalue { i32, i1 } [[T]], 1
+; CHECK-NEXT:    ret i1 [[OBIT]]
+;
+  %rem = srem i32 %v1, 65537
+  ; This may overflow because the result of the mul operands may be greater than 16bits
+  ; and the result greater than 32.
+  %t = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %rem, i32 %rem)
+  %obit = extractvalue { i32, i1 } %t, 1
+  ret i1 %obit
+}
+
+define { i32, i1 } @ssubtest_reorder(i8 %a) {
+; CHECK-LABEL: @ssubtest_reorder(
+; CHECK-NEXT:    [[AA:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[X:%.*]] = sub nsw i32 0, [[AA]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[X]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
+;
+  %aa = sext i8 %a to i32
+  %x = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 0, i32 %aa)
+  ret { i32, i1 } %x
+}
+
+define { i32, i1 } @never_overflows_ssub_test0(i32 %a) {
+; CHECK-LABEL: @never_overflows_ssub_test0(
+; CHECK-NEXT:    [[X:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret { i32, i1 } [[X]]
+;
+  %x = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 0)
+  ret { i32, i1 } %x
+}
+
+define i1 @uadd_res_ult_x(i32 %x, i32 %y, i1* %p) nounwind {
+; CHECK-LABEL: @uadd_res_ult_x(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
+; CHECK-NEXT:    store i1 [[B]], i1* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
+; CHECK-NEXT:    [[D:%.*]] = icmp ult i32 [[C]], [[X]]
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+  %b = extractvalue { i32, i1 } %a, 1
+  store i1 %b, i1* %p
+  %c = extractvalue { i32, i1 } %a, 0
+  %d = icmp ult i32 %c, %x
+  ret i1 %d
+}
+
+define i1 @uadd_res_ult_y(i32 %x, i32 %y, i1* %p) nounwind {
+; CHECK-LABEL: @uadd_res_ult_y(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
+; CHECK-NEXT:    store i1 [[B]], i1* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
+; CHECK-NEXT:    [[D:%.*]] = icmp ult i32 [[C]], [[Y]]
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+  %b = extractvalue { i32, i1 } %a, 1
+  store i1 %b, i1* %p
+  %c = extractvalue { i32, i1 } %a, 0
+  %d = icmp ult i32 %c, %y
+  ret i1 %d
+}
+
+define i1 @uadd_res_ugt_x(i32 %xx, i32 %y, i1* %p) nounwind {
+; CHECK-LABEL: @uadd_res_ugt_x(
+; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[XX:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
+; CHECK-NEXT:    store i1 [[B]], i1* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
+; CHECK-NEXT:    [[D:%.*]] = icmp ugt i32 [[X]], [[C]]
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %x = urem i32 42, %xx ; Thwart complexity-based canonicalization
+  %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+  %b = extractvalue { i32, i1 } %a, 1
+  store i1 %b, i1* %p
+  %c = extractvalue { i32, i1 } %a, 0
+  %d = icmp ugt i32 %x, %c
+  ret i1 %d
+}
+
+define i1 @uadd_res_ugt_y(i32 %x, i32 %yy, i1* %p) nounwind {
+; CHECK-LABEL: @uadd_res_ugt_y(
+; CHECK-NEXT:    [[Y:%.*]] = urem i32 42, [[YY:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
+; CHECK-NEXT:    store i1 [[B]], i1* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
+; CHECK-NEXT:    [[D:%.*]] = icmp ugt i32 [[Y]], [[C]]
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %y = urem i32 42, %yy ; Thwart complexity-based canonicalization
+  %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+  %b = extractvalue { i32, i1 } %a, 1
+  store i1 %b, i1* %p
+  %c = extractvalue { i32, i1 } %a, 0
+  %d = icmp ugt i32 %y, %c
+  ret i1 %d
+}
+
+define i1 @uadd_res_ult_const(i32 %x, i1* %p) nounwind {
+; CHECK-LABEL: @uadd_res_ult_const(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
+; CHECK-NEXT:    store i1 [[B]], i1* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
+; CHECK-NEXT:    [[D:%.*]] = icmp ult i32 [[C]], 42
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 42)
+  %b = extractvalue { i32, i1 } %a, 1
+  store i1 %b, i1* %p
+  %c = extractvalue { i32, i1 } %a, 0
+  %d = icmp ult i32 %c, 42
+  ret i1 %d
+}
+
+define i1 @uadd_res_ult_const_one(i32 %x, i1* %p) nounwind {
+; CHECK-LABEL: @uadd_res_ult_const_one(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT:    [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
+; CHECK-NEXT:    store i1 [[B]], i1* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
+; CHECK-NEXT:    [[D:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 1)
+  %b = extractvalue { i32, i1 } %a, 1
+  store i1 %b, i1* %p
+  %c = extractvalue { i32, i1 } %a, 0
+  %d = icmp ult i32 %c, 1
+  ret i1 %d
+}
+
+define i1 @uadd_res_ult_const_minus_one(i32 %x, i1* %p) nounwind {
+; CHECK-LABEL: @uadd_res_ult_const_minus_one(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 -1)
+; CHECK-NEXT:    [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1
+; CHECK-NEXT:    store i1 [[B]], i1* [[P:%.*]], align 1
+; CHECK-NEXT:    [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i32 [[C]], -1
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 -1)
+  %b = extractvalue { i32, i1 } %a, 1
+  store i1 %b, i1* %p
+  %c = extractvalue { i32, i1 } %a, 0
+  %d = icmp ult i32 %c, -1
+  ret i1 %d
+}
+
+define { i32, i1 } @sadd_canonicalize_constant_arg0(i32 %x) nounwind {
+; CHECK-LABEL: @sadd_canonicalize_constant_arg0(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret { i32, i1 } [[A]]
+;
+  %a = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 42, i32 %x)
+  ret { i32, i1 } %a
+}
+
+define { i32, i1 } @uadd_canonicalize_constant_arg0(i32 %x) nounwind {
+; CHECK-LABEL: @uadd_canonicalize_constant_arg0(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret { i32, i1 } [[A]]
+;
+  %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 42, i32 %x)
+  ret { i32, i1 } %a
+}
+
+define { i32, i1 } @ssub_no_canonicalize_constant_arg0(i32 %x) nounwind {
+; CHECK-LABEL: @ssub_no_canonicalize_constant_arg0(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 42, i32 [[X:%.*]])
+; CHECK-NEXT:    ret { i32, i1 } [[A]]
+;
+  %a = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 42, i32 %x)
+  ret { i32, i1 } %a
+}
+
+define { i32, i1 } @usub_no_canonicalize_constant_arg0(i32 %x) nounwind {
+; CHECK-LABEL: @usub_no_canonicalize_constant_arg0(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 42, i32 [[X:%.*]])
+; CHECK-NEXT:    ret { i32, i1 } [[A]]
+;
+  %a = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 42, i32 %x)
+  ret { i32, i1 } %a
+}
+
+define { i32, i1 } @smul_canonicalize_constant_arg0(i32 %x) nounwind {
+; CHECK-LABEL: @smul_canonicalize_constant_arg0(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret { i32, i1 } [[A]]
+;
+  %a = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 42, i32 %x)
+  ret { i32, i1 } %a
+}
+
+define { i32, i1 } @umul_canonicalize_constant_arg0(i32 %x) nounwind {
+; CHECK-LABEL: @umul_canonicalize_constant_arg0(
+; CHECK-NEXT:    [[A:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret { i32, i1 } [[A]]
+;
+  %a = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 42, i32 %x)
+  ret { i32, i1 } %a
+}
+
+; Always overflow tests
+
+define { i8, i1 } @uadd_always_overflow(i8 %x) nounwind {
+; CHECK-LABEL: @uadd_always_overflow(
+; CHECK-NEXT:    [[Y:%.*]] = or i8 [[X:%.*]], -64
+; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[Y]], 64
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %y = or i8 %x, 192
+  %a = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %y, i8 64)
+  ret { i8, i1 } %a
+}
+
+define { i8, i1 } @usub_always_overflow(i8 %x) nounwind {
+; CHECK-LABEL: @usub_always_overflow(
+; CHECK-NEXT:    [[Y:%.*]] = or i8 [[X:%.*]], 64
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i8 63, [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %y = or i8 %x, 64
+  %a = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 63, i8 %y)
+  ret { i8, i1 } %a
+}
+
+define { i8, i1 } @umul_always_overflow(i8 %x) nounwind {
+; CHECK-LABEL: @umul_always_overflow(
+; CHECK-NEXT:    [[A:%.*]] = shl i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %y = or i8 %x, 128
+  %a = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %y, i8 2)
+  ret { i8, i1 } %a
+}
+
+define { i8, i1 } @sadd_always_overflow(i8 %x) nounwind {
+; CHECK-LABEL: @sadd_always_overflow(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[X:%.*]], 100
+; CHECK-NEXT:    [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 100
+; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[Y]], 28
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %c = icmp sgt i8 %x, 100
+  %y = select i1 %c, i8 %x, i8 100
+  %a = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %y, i8 28)
+  ret { i8, i1 } %a
+}
+
+define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind {
+; CHECK-LABEL: @ssub_always_overflow(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[X:%.*]], 29
+; CHECK-NEXT:    [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 29
+; CHECK-NEXT:    [[A:%.*]] = sub nuw i8 -100, [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
+; CHECK-NEXT:    ret { i8, i1 } [[TMP1]]
+;
+  %c = icmp sgt i8 %x, 29
+  %y = select i1 %c, i8 %x, i8 29
+  %a = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 -100, i8 %y)
+  ret { i8, i1 } %a
+}
+
+define { i8, i1 } @smul_always_overflow(i8 %x) nounwind {
+; CHECK-LABEL: @smul_always_overflow(
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[X:%.*]], 100
+; CHECK-NEXT:    [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 100
+; CHECK-NEXT:    [[A:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[Y]], i8 2)
+; CHECK-NEXT:    ret { i8, i1 } [[A]]
+;
+  %c = icmp sgt i8 %x, 100
+  %y = select i1 %c, i8 %x, i8 100
+  %a = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %y, i8 2)
+  ret { i8, i1 } %a
+}
diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll
new file mode 100644
index 00000000000..7f0a8d8cd2a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @eq_zero(i4 %x, i4 %y) {
+; CHECK-LABEL: @eq_zero(
+; CHECK-NEXT:    [[I0:%.*]] = icmp eq i4 [[X:%.*]], 0
+; CHECK-NEXT:    [[I1:%.*]] = icmp eq i4 [[Y:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = xor i1 [[I0]], [[I1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %i0 = icmp eq i4 %x, 0
+  %i1 = icmp eq i4 %y, 0
+  %r = xor i1 %i0, %i1
+  ret i1 %r
+}
+
+define i1 @ne_zero(i4 %x, i4 %y) {
+; CHECK-LABEL: @ne_zero(
+; CHECK-NEXT:    [[I0:%.*]] = icmp ne i4 [[X:%.*]], 0
+; CHECK-NEXT:    [[I1:%.*]] = icmp ne i4 [[Y:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = xor i1 [[I0]], [[I1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %i0 = icmp ne i4 %x, 0
+  %i1 = icmp ne i4 %y, 0
+  %r = xor i1 %i0, %i1
+  ret i1 %r
+}
+
+define i1 @eq_ne_zero(i4 %x, i4 %y) {
+; CHECK-LABEL: @eq_ne_zero(
+; CHECK-NEXT:    [[I0:%.*]] = icmp eq i4 [[X:%.*]], 0
+; CHECK-NEXT:    [[I1:%.*]] = icmp ne i4 [[Y:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = xor i1 [[I0]], [[I1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %i0 = icmp eq i4 %x, 0
+  %i1 = icmp ne i4 %y, 0
+  %r = xor i1 %i0, %i1
+  ret i1 %r
+}
+
+define i1 @slt_zero(i4 %x, i4 %y) {
+; CHECK-LABEL: @slt_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i4 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %i0 = icmp slt i4 %x, 0
+  %i1 = icmp slt i4 %y, 0
+  %r = xor i1 %i0, %i1
+  ret i1 %r
+}
+
+; Don't increase the instruction count.
+
+declare void @use(i1)
+
+define i1 @slt_zero_extra_uses(i4 %x, i4 %y) {
+; CHECK-LABEL: @slt_zero_extra_uses(
+; CHECK-NEXT:    [[I0:%.*]] = icmp slt i4 [[X:%.*]], 0
+; CHECK-NEXT:    [[I1:%.*]] = icmp slt i4 [[Y:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = xor i1 [[I0]], [[I1]]
+; CHECK-NEXT:    call void @use(i1 [[I0]])
+; CHECK-NEXT:    call void @use(i1 [[I1]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %i0 = icmp slt i4 %x, 0
+  %i1 = icmp slt i4 %y, 0
+  %r = xor i1 %i0, %i1
+  call void @use(i1 %i0)
+  call void @use(i1 %i1)
+  ret i1 %r
+}
+
+define i1 @sgt_zero(i4 %x, i4 %y) {
+; CHECK-LABEL: @sgt_zero(
+; CHECK-NEXT:    [[I0:%.*]] = icmp sgt i4 [[X:%.*]], 0
+; CHECK-NEXT:    [[I1:%.*]] = icmp sgt i4 [[Y:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = xor i1 [[I0]], [[I1]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %i0 = icmp sgt i4 %x, 0
+  %i1 = icmp sgt i4 %y, 0
+  %r = xor i1 %i0, %i1
+  ret i1 %r
+}
+
+define i1 @sgt_minus1(i4 %x, i4 %y) {
+; CHECK-LABEL: @sgt_minus1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i4 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %i0 = icmp sgt i4 %x, -1
+  %i1 = icmp sgt i4 %y, -1
+  %r = xor i1 %i0, %i1
+  ret i1 %r
+}
+
+define i1 @slt_zero_sgt_minus1(i4 %x, i4 %y) {
+; CHECK-LABEL: @slt_zero_sgt_minus1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i4 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %i0 = icmp slt i4 %x, 0
+  %i1 = icmp sgt i4 %y, -1
+  %r = xor i1 %i0, %i1
+  ret i1 %r
+}
+
+define <2 x i1> @sgt_minus1_slt_zero_sgt(<2 x i4> %x, <2 x i4> %y) {
+; CHECK-LABEL: @sgt_minus1_slt_zero_sgt(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i4> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i4> [[TMP1]], <i4 -1, i4 -1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %i1 = icmp sgt <2 x i4> %x, <i4 -1, i4 -1>
+  %i0 = icmp slt <2 x i4> %y, zeroinitializer
+  %r = xor <2 x i1> %i0, %i1
+  ret <2 x i1> %r
+}
+
+; Don't try (crash) if the operand types don't match.
+
+define i1 @different_type_cmp_ops(i32 %x, i64 %y) {
+; CHECK-LABEL: @different_type_cmp_ops(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[Y:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = xor i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %cmp1 = icmp slt i32 %x, 0
+  %cmp2 = icmp slt i64 %y, 0
+  %r = xor i1 %cmp1, %cmp2
+  ret i1 %r
+}
+
+define i1 @test13(i8 %A, i8 %B) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %C = icmp ult i8 %A, %B
+  %D = icmp ugt i8 %A, %B
+  %E = xor i1 %C, %D
+  ret i1 %E
+}
+
+define i1 @test14(i8 %A, i8 %B) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    ret i1 true
+;
+  %C = icmp eq i8 %A, %B
+  %D = icmp ne i8 %B, %A
+  %E = xor i1 %C, %D
+  ret i1 %E
+}
+
+define i1 @xor_icmp_ptr(i8* %c, i8* %d) {
+; CHECK-LABEL: @xor_icmp_ptr(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8* [[C:%.*]], null
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i8* [[D:%.*]], null
+; CHECK-NEXT:    [[XOR:%.*]] = xor i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %cmp = icmp slt i8* %c, null
+  %cmp1 = icmp slt i8* %d, null
+  %xor = xor i1 %cmp, %cmp1
+  ret i1 %xor
+}
+
diff --git a/llvm/test/Transforms/InstCombine/xor-undef.ll b/llvm/test/Transforms/InstCombine/xor-undef.ll
new file mode 100644
index 00000000000..cf72955b66b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/xor-undef.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -instcombine -S | grep zeroinitializer
+
+define <2 x i64> @f() {
+	%tmp = xor <2 x i64> undef, undef
+        ret <2 x i64> %tmp
+}
diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll
new file mode 100644
index 00000000000..b06abe2919b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/xor.ll
@@ -0,0 +1,914 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@G1 = global i32 0
+@G2 = global i32 0
+
+define i1 @test0(i1 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:    ret i1 [[A:%.*]]
+;
+  %B = xor i1 %A, false
+  ret i1 %B
+}
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = xor i32 %A, 0
+  ret i32 %B
+}
+
+define i1 @test2(i1 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = xor i1 %A, %A
+  ret i1 %B
+}
+
+define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = xor i32 %A, %A
+  ret i32 %B
+}
+
+define i32 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret i32 -1
+;
+  %NotA = xor i32 -1, %A
+  %B = xor i32 %A, %NotA
+  ret i32 %B
+}
+
+define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], -124
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %t1 = or i32 %A, 123
+  %r = xor i32 %t1, 123
+  ret i32 %r
+}
+
+define i8 @test6(i8 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i8 [[A:%.*]]
+;
+  %B = xor i8 %A, 17
+  %C = xor i8 %B, 17
+  ret i8 %C
+}
+
+define i32 @test7(i32 %A, i32 %B) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[A1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[B1:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[C11:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT:    ret i32 [[C11]]
+;
+  %A1 = and i32 %A, 7
+  %B1 = and i32 %B, 128
+  %C1 = xor i32 %A1, %B1
+  ret i32 %C1
+}
+
+define i8 @test8(i1 %c) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[FALSE:%.*]], label [[TRUE:%.*]]
+; CHECK:       True:
+; CHECK-NEXT:    ret i8 1
+; CHECK:       False:
+; CHECK-NEXT:    ret i8 3
+;
+  %d = xor i1 %c, true
+  br i1 %d, label %True, label %False
+
+True:
+  ret i8 1
+
+False:
+  ret i8 3
+}
+
+define i1 @test9(i8 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[A:%.*]], 89
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = xor i8 %A, 123
+  %C = icmp eq i8 %B, 34
+  ret i1 %C
+}
+
+define <2 x i1> @test9vec(<2 x i8> %a) {
+; CHECK-LABEL: @test9vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], <i8 89, i8 89>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = xor <2 x i8> %a, <i8 123, i8 123>
+  %c = icmp eq <2 x i8> %b, <i8 34, i8 34>
+  ret <2 x i1> %c
+}
+
+define i8 @test10(i8 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[A:%.*]], 3
+; CHECK-NEXT:    [[C1:%.*]] = or i8 [[B]], 4
+; CHECK-NEXT:    ret i8 [[C1]]
+;
+  %B = and i8 %A, 3
+  %C = xor i8 %B, 4
+  ret i8 %C
+}
+
+define i8 @test11(i8 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[A:%.*]], -13
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[B]], 8
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %B = or i8 %A, 12
+  %C = xor i8 %B, 4
+  ret i8 %C
+}
+
+define i1 @test12(i8 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A:%.*]], 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = xor i8 %A, 4
+  %c = icmp ne i8 %B, 0
+  ret i1 %c
+}
+
+define <2 x i1> @test12vec(<2 x i8> %a) {
+; CHECK-LABEL: @test12vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = xor <2 x i8> %a, <i8 4, i8 4>
+  %c = icmp ne <2 x i8> %b, zeroinitializer
+  ret <2 x i1> %c
+}
+
+define i32 @test18(i32 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A:%.*]], 124
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = xor i32 %A, -1
+  %C = sub i32 123, %B
+  ret i32 %C
+}
+
+define i32 @test19(i32 %A, i32 %B) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    ret i32 [[B:%.*]]
+;
+  %C = xor i32 %A, %B
+  %D = xor i32 %C, %A
+  ret i32 %D
+}
+
+define void @test20(i32 %A, i32 %B) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    store i32 [[B:%.*]], i32* @G1, align 4
+; CHECK-NEXT:    store i32 [[A:%.*]], i32* @G2, align 4
+; CHECK-NEXT:    ret void
+;
+  %t2 = xor i32 %B, %A
+  %t5 = xor i32 %t2, %B
+  %t8 = xor i32 %t5, %t2
+  store i32 %t8, i32* @G1
+  store i32 %t5, i32* @G2
+  ret void
+}
+
+define i32 @test22(i1 %X) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[X:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %Y = xor i1 %X, true
+  %Z = zext i1 %Y to i32
+  %Q = xor i32 %Z, 1
+  ret i32 %Q
+}
+
+; Look through a zext between xors.
+
+define i32 @fold_zext_xor_sandwich(i1 %X) {
+; CHECK-LABEL: @fold_zext_xor_sandwich(
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[X:%.*]] to i32
+; CHECK-NEXT:    [[Q:%.*]] = xor i32 [[Z]], 3
+; CHECK-NEXT:    ret i32 [[Q]]
+;
+  %Y = xor i1 %X, true
+  %Z = zext i1 %Y to i32
+  %Q = xor i32 %Z, 2
+  ret i32 %Q
+}
+
+define <2 x i32> @fold_zext_xor_sandwich_vec(<2 x i1> %X) {
+; CHECK-LABEL: @fold_zext_xor_sandwich_vec(
+; CHECK-NEXT:    [[Z:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[Q:%.*]] = xor <2 x i32> [[Z]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[Q]]
+;
+  %Y = xor <2 x i1> %X, <i1 true, i1 true>
+  %Z = zext <2 x i1> %Y to <2 x i32>
+  %Q = xor <2 x i32> %Z, <i32 2, i32 2>
+  ret <2 x i32> %Q
+}
+
+define i1 @test23(i32 %a, i32 %b) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[T4:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t2 = xor i32 %b, %a
+  %t4 = icmp eq i32 %t2, %a
+  ret i1 %t4
+}
+
+define i1 @test24(i32 %c, i32 %d) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[T4:%.*]] = icmp ne i32 [[D:%.*]], 0
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t2 = xor i32 %d, %c
+  %t4 = icmp ne i32 %t2, %c
+  ret i1 %t4
+}
+
+define i32 @test25(i32 %g, i32 %h) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[T4:%.*]] = and i32 [[H:%.*]], [[G:%.*]]
+; CHECK-NEXT:    ret i32 [[T4]]
+;
+  %h2 = xor i32 %h, -1
+  %t2 = and i32 %h2, %g
+  %t4 = xor i32 %t2, %g
+  ret i32 %t4
+}
+
+define i32 @test27(i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[T6:%.*]] = icmp eq i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[T7:%.*]] = zext i1 [[T6]] to i32
+; CHECK-NEXT:    ret i32 [[T7]]
+;
+  %t2 = xor i32 %d, %b
+  %t5 = xor i32 %d, %c
+  %t6 = icmp eq i32 %t2, %t5
+  %t7 = zext i1 %t6 to i32
+  ret i32 %t7
+}
+
+define i32 @test28(i32 %indvar) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[T214:%.*]] = add i32 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    ret i32 [[T214]]
+;
+  %t7 = add i32 %indvar, -2147483647
+  %t214 = xor i32 %t7, -2147483648
+  ret i32 %t214
+}
+
+define <2 x i32> @test28vec(<2 x i32> %indvar) {
+; CHECK-LABEL: @test28vec(
+; CHECK-NEXT:    [[T214:%.*]] = add <2 x i32> [[INDVAR:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[T214]]
+;
+  %t7 = add <2 x i32> %indvar, <i32 -2147483647, i32 -2147483647>
+  %t214 = xor <2 x i32> %t7, <i32 -2147483648, i32 -2147483648>
+  ret <2 x i32> %t214
+}
+
+define i32 @test28_sub(i32 %indvar) {
+; CHECK-LABEL: @test28_sub(
+; CHECK-NEXT:    [[T214:%.*]] = sub i32 1, [[INDVAR:%.*]]
+; CHECK-NEXT:    ret i32 [[T214]]
+;
+  %t7 = sub i32 -2147483647, %indvar
+  %t214 = xor i32 %t7, -2147483648
+  ret i32 %t214
+}
+
+define <2 x i32> @test28_subvec(<2 x i32> %indvar) {
+; CHECK-LABEL: @test28_subvec(
+; CHECK-NEXT:    [[T214:%.*]] = sub <2 x i32> <i32 1, i32 1>, [[INDVAR:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[T214]]
+;
+  %t7 = sub <2 x i32> <i32 -2147483647, i32 -2147483647>, %indvar
+  %t214 = xor <2 x i32> %t7, <i32 -2147483648, i32 -2147483648>
+  ret <2 x i32> %t214
+}
+
+define i32 @test29(i1 %C) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 915, i32 113
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = xor i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test29vec(i1 %C) {
+; CHECK-LABEL: @test29vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 915, i32 915>, <2 x i32> <i32 113, i32 113>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = xor <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test29vec2(i1 %C) {
+; CHECK-LABEL: @test29vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 915, i32 2185>, <2 x i32> <i32 113, i32 339>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = xor <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %V
+}
+
+define i32 @test30(i1 %which) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 915, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = xor i32 %A, 123
+  ret i32 %value
+}
+
+define <2 x i32> @test30vec(i1 %which) {
+; CHECK-LABEL: @test30vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 915, i32 915>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = xor <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test30vec2(i1 %which) {
+; CHECK-LABEL: @test30vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 915, i32 2185>, [[ENTRY:%.*]] ], [ <i32 113, i32 339>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = xor <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %value
+}
+
+; B ^ (B | A) --> A & ~B
+; The division ops are here to thwart complexity-based canonicalization: all ops are binops.
+
+define i32 @or_xor_commute1(i32 %p1, i32 %p2) {
+; CHECK-LABEL: @or_xor_commute1(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[P1:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %p1
+  %b = udiv i32 42, %p2
+  %o = or i32 %b, %a
+  %r = xor i32 %b, %o
+  ret i32 %r
+}
+
+; B ^ (B | A) --> A & ~B
+; The division ops are here to thwart complexity-based canonicalization: all ops are binops.
+
+define i32 @or_xor_commute2(i32 %p1, i32 %p2) {
+; CHECK-LABEL: @or_xor_commute2(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[P1:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %p1
+  %b = udiv i32 42, %p2
+  %o = or i32 %a, %b
+  %r = xor i32 %o, %b
+  ret i32 %r
+}
+
+; B ^ (B | A) --> A & ~B
+; The division ops are here to thwart complexity-based canonicalization: all ops are binops.
+
+define i32 @or_xor_commute3(i32 %p1, i32 %p2) {
+; CHECK-LABEL: @or_xor_commute3(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[P1:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %p1
+  %b = udiv i32 42, %p2
+  %o = or i32 %b, %a
+  %r = xor i32 %o, %b
+  ret i32 %r
+}
+
+; B ^ (B | A) --> A & ~B
+; The division ops are here to thwart complexity-based canonicalization: all ops are binops.
+
+define i32 @or_xor_commute4(i32 %p1, i32 %p2) {
+; CHECK-LABEL: @or_xor_commute4(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[P1:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %p1
+  %b = udiv i32 42, %p2
+  %o = or i32 %a, %b
+  %r = xor i32 %b, %o
+  ret i32 %r
+}
+
+define i32 @or_xor_extra_use(i32 %a, i32 %b, i32* %p) {
+; CHECK-LABEL: @or_xor_extra_use(
+; CHECK-NEXT:    [[O:%.*]] = or i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    store i32 [[O]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[O]], [[B]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %o = or i32 %a, %b
+  store i32 %o, i32* %p
+  %r = xor i32 %b, %o
+  ret i32 %r
+}
+
+; B ^ (B & A) --> ~A & B
+; The division ops are here to thwart complexity-based canonicalization: all ops are binops.
+
+define i32 @and_xor_commute1(i32 %p1, i32 %p2) {
+; CHECK-LABEL: @and_xor_commute1(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[P1:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[B]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %p1
+  %b = udiv i32 42, %p2
+  %o = and i32 %b, %a
+  %r = xor i32 %b, %o
+  ret i32 %r
+}
+
+; B ^ (B & A) --> ~A & B
+; The division ops are here to thwart complexity-based canonicalization: all ops are binops.
+
+define i32 @and_xor_commute2(i32 %p1, i32 %p2) {
+; CHECK-LABEL: @and_xor_commute2(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[P1:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[B]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %p1
+  %b = udiv i32 42, %p2
+  %o = and i32 %a, %b
+  %r = xor i32 %o, %b
+  ret i32 %r
+}
+
+; B ^ (B & A) --> ~A & B
+; The division ops are here to thwart complexity-based canonicalization: all ops are binops.
+
+define i32 @and_xor_commute3(i32 %p1, i32 %p2) {
+; CHECK-LABEL: @and_xor_commute3(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[P1:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[B]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %p1
+  %b = udiv i32 42, %p2
+  %o = and i32 %b, %a
+  %r = xor i32 %o, %b
+  ret i32 %r
+}
+
+; B ^ (B & A) --> ~A & B
+; The division ops are here to thwart complexity-based canonicalization: all ops are binops.
+
+define i32 @and_xor_commute4(i32 %p1, i32 %p2) {
+; CHECK-LABEL: @and_xor_commute4(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[P1:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 42, [[P2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[B]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %p1
+  %b = udiv i32 42, %p2
+  %o = and i32 %a, %b
+  %r = xor i32 %b, %o
+  ret i32 %r
+}
+
+define i32 @and_xor_extra_use(i32 %a, i32 %b, i32* %p) {
+; CHECK-LABEL: @and_xor_extra_use(
+; CHECK-NEXT:    [[O:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    store i32 [[O]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[O]], [[B]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %o = and i32 %a, %b
+  store i32 %o, i32* %p
+  %r = xor i32 %b, %o
+  ret i32 %r
+}
+
+; (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
+; The extra use (store) is here because the simpler case
+; may be transformed using demanded bits.
+
+define i8 @xor_or_not(i8 %x, i8* %p) {
+; CHECK-LABEL: @xor_or_not(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    store i8 [[NX]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], -8
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[TMP1]], -13
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  store i8 %nx, i8* %p
+  %or = or i8 %nx, 7
+  %r = xor i8 %or, 12
+  ret i8 %r
+}
+
+; Don't do this if the 'or' has extra uses.
+
+define i8 @xor_or_not_uses(i8 %x, i8* %p) {
+; CHECK-LABEL: @xor_or_not_uses(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[OR:%.*]] = xor i8 [[TMP1]], -8
+; CHECK-NEXT:    store i8 [[OR]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[TMP1]], -12
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %or = or i8 %nx, 7
+  store i8 %or, i8* %p
+  %r = xor i8 %or, 12
+  ret i8 %r
+}
+
+; (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1
+; The extra use (store) is here because the simpler case
+; may be transformed using demanded bits.
+
+define i8 @xor_and_not(i8 %x, i8* %p) {
+; CHECK-LABEL: @xor_and_not(
+; CHECK-NEXT:    [[NX:%.*]] = xor i8 [[X:%.*]], -1
+; CHECK-NEXT:    store i8 [[NX]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X]], -43
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[TMP1]], -32
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  store i8 %nx, i8* %p
+  %and = and i8 %nx, 42
+  %r = xor i8 %and, 31
+  ret i8 %r
+}
+
+; Don't do this if the 'and' has extra uses.
+
+define i8 @xor_and_not_uses(i8 %x, i8* %p) {
+; CHECK-LABEL: @xor_and_not_uses(
+; CHECK-NEXT:    [[NX:%.*]] = and i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = xor i8 [[NX]], 42
+; CHECK-NEXT:    store i8 [[AND]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[NX]], 53
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %nx = xor i8 %x, -1
+  %and = and i8 %nx, 42
+  store i8 %and, i8* %p
+  %r = xor i8 %and, 31
+  ret i8 %r
+}
+
+; The tests 39-47 are related to the canonicalization:
+; %notx = xor i32 %x, -1
+; %cmp = icmp sgt i32 %notx, %y
+; %smax = select i1 %cmp, i32 %notx, i32 %y
+; %res = xor i32 %smax, -1
+;   =>
+; %noty = xor i32 %y, -1
+; %cmp2 = icmp slt %x, %noty
+; %res = select i1 %cmp2, i32 %x, i32 %noty
+;
+; Same transformations is valid for smin/umax/umin.
+
+define i32 @test39(i32 %x) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %1 = xor i32 %x, -1
+  %2 = icmp sgt i32 %1, -256
+  %3 = select i1 %2, i32 %1, i32 -256
+  %res = xor i32 %3, -1
+  ret i32 %res
+}
+
+define i32 @test40(i32 %x, i32 %y) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %notx = xor i32 %x, -1
+  %cmp1 = icmp sgt i32 %notx, %y
+  %smax = select i1 %cmp1, i32 %notx, i32 %y
+  %res = xor i32 %smax, -1
+  ret i32 %res
+}
+
+define i32 @test41(i32 %x, i32 %y) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %notx = xor i32 %x, -1
+  %cmp1 = icmp slt i32 %notx, %y
+  %smin = select i1 %cmp1, i32 %notx, i32 %y
+  %res = xor i32 %smin, -1
+  ret i32 %res
+}
+
+define i32 @test42(i32 %x, i32 %y) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %notx = xor i32 %x, -1
+  %cmp1 = icmp ugt i32 %notx, %y
+  %umax = select i1 %cmp1, i32 %notx, i32 %y
+  %res = xor i32 %umax, -1
+  ret i32 %res
+}
+
+define i32 @test43(i32 %x, i32 %y) {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %notx = xor i32 %x, -1
+  %cmp1 = icmp ult i32 %notx, %y
+  %umin = select i1 %cmp1, i32 %notx, i32 %y
+  %res = xor i32 %umin, -1
+  ret i32 %res
+}
+
+define i32 @test44(i32 %x, i32 %y) {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 -4, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %z = add i32 %y, 3 ; thwart complexity-based canonicalization
+  %notx = xor i32 %x, -1
+  %cmp1 = icmp ult i32 %z, %notx
+  %umin = select i1 %cmp1, i32 %z, i32 %notx
+  %res = xor i32 %umin, -1
+  ret i32 %res
+}
+
+define i32 @test45(i32 %x, i32 %y) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %z = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %cmp1 = icmp ult i32 %z, %notx
+  %umin = select i1 %cmp1, i32 %z, i32 %notx
+  %res = xor i32 %umin, -1
+  ret i32 %res
+}
+
+; Check that we work with splat vectors also.
+define <4 x i32> @test46(<4 x i32> %x) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %2 = icmp sgt <4 x i32> %1, <i32 -256, i32 -256, i32 -256, i32 -256>
+  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 -256, i32 -256, i32 -256, i32 -256>
+  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %4
+}
+
+; Test case when select pattern has more than one use.
+define i32 @test47(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[NOTX]], [[Y:%.*]]
+; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[CMP1]], i32 [[NOTX]], i32 [[Y]]
+; CHECK-NEXT:    [[UMIN:%.*]] = xor i32 [[UMAX]], -1
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[UMAX]], [[Z:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = mul i32 [[ADD]], [[UMIN]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %notx = xor i32 %x, -1
+  %cmp1 = icmp ugt i32 %notx, %y
+  %umax = select i1 %cmp1, i32 %notx, i32 %y
+  %umin = xor i32 %umax, -1
+  %add = add i32 %umax, %z
+  %res = mul i32 %umin, %add
+  ret i32 %res
+}
+
+define i32 @test48(i32 %x) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP1]], -1
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 -1
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %a = sub i32 -2, %x
+  %b = icmp sgt i32 %a, 0
+  %c = select i1 %b, i32 %a, i32 0
+  %d = xor i32 %c, -1
+  ret i32 %d
+}
+
+define <2 x i32> @test48vec(<2 x i32> %x) {
+; CHECK-LABEL: @test48vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[D:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> [[D]]
+;
+  %a = sub <2 x i32> <i32 -2, i32 -2>, %x
+  %b = icmp sgt <2 x i32> %a, zeroinitializer
+  %c = select <2 x i1> %b, <2 x i32> %a, <2 x i32> zeroinitializer
+  %d = xor <2 x i32> %c, <i32 -1, i32 -1>
+  ret <2 x i32> %d
+}
+
+define i32 @test49(i32 %x) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %a = add i32 %x, -2
+  %b = icmp slt i32 %a, -1
+  %c = select i1 %b, i32 %a, i32 -1
+  %d = xor i32 %c, -1
+  ret i32 %d
+}
+
+define <2 x i32> @test49vec(<2 x i32> %x) {
+; CHECK-LABEL: @test49vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 1, i32 1>, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[D:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[D]]
+;
+  %a = add <2 x i32> %x, <i32 -2, i32 -2>
+  %b = icmp slt <2 x i32> %a, <i32 -1, i32 -1>
+  %c = select <2 x i1> %b, <2 x i32> %a, <2 x i32> <i32 -1, i32 -1>
+  %d = xor <2 x i32> %c, <i32 -1, i32 -1>
+  ret <2 x i32> %d
+}
+
+define i32 @test50(i32 %x, i32 %y) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 1, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %a = add i32 %x, -2
+  %b = sub i32 -2, %y
+  %c = icmp slt i32 %a, %b
+  %d = select i1 %c, i32 %a, i32 %b
+  %e = xor i32 %d, -1
+  ret i32 %e
+}
+
+define <2 x i32> @test50vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test50vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 1, i32 1>, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i32> [[E]]
+;
+  %a = add <2 x i32> %x, <i32 -2, i32 -2>
+  %b = sub <2 x i32> <i32 -2, i32 -2>, %y
+  %c = icmp slt <2 x i32> %a, %b
+  %d = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b
+  %e = xor <2 x i32> %d, <i32 -1, i32 -1>
+  ret <2 x i32> %e
+}
+
+define i32 @test51(i32 %x, i32 %y) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 -3, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[Y:%.*]], -3
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %a = add i32 %x, 2
+  %b = sub i32 2, %y
+  %c = icmp sgt i32 %a, %b
+  %d = select i1 %c, i32 %a, i32 %b
+  %e = xor i32 %d, -1
+  ret i32 %e
+}
+
+define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @test51vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 -3, i32 -3>, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], <i32 -3, i32 -3>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <2 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i32> [[E]]
+;
+  %a = add <2 x i32> %x, <i32 2, i32 2>
+  %b = sub <2 x i32> <i32 2, i32 2>, %y
+  %c = icmp sgt <2 x i32> %a, %b
+  %d = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b
+  %e = xor <2 x i32> %d, <i32 -1, i32 -1>
+  ret <2 x i32> %e
+}
diff --git a/llvm/test/Transforms/InstCombine/xor2.ll b/llvm/test/Transforms/InstCombine/xor2.ll
new file mode 100644
index 00000000000..fe969769a02
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/xor2.ll
@@ -0,0 +1,513 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1253
+define i1 @test0(i32 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = xor i32 %A, -2147483648
+  %C = icmp sgt i32 %B, -1
+  ret i1 %C
+}
+
+define <2 x i1> @test0vec(<2 x i32> %A) {
+; CHECK-LABEL: @test0vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = xor <2 x i32> %A, <i32 -2147483648, i32 -2147483648>
+  %C = icmp sgt <2 x i32> %B, <i32 -1, i32 -1>
+  ret <2 x i1> %C
+}
+
+define i1 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = xor i32 %A, 12345
+  %C = icmp slt i32 %B, 0
+  ret i1 %C
+}
+
+; PR1014
+define i32 @test2(i32 %tmp1) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[OVM:%.*]] = and i32 [[TMP1:%.*]], 32
+; CHECK-NEXT:    [[OV1101:%.*]] = or i32 [[OVM]], 8
+; CHECK-NEXT:    ret i32 [[OV1101]]
+;
+  %ovm = and i32 %tmp1, 32
+  %ov3 = add i32 %ovm, 145
+  %ov110 = xor i32 %ov3, 153
+  ret i32 %ov110
+}
+
+define i32 @test3(i32 %tmp1) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[OVM:%.*]] = and i32 [[TMP1:%.*]], 32
+; CHECK-NEXT:    [[OV1101:%.*]] = or i32 [[OVM]], 8
+; CHECK-NEXT:    ret i32 [[OV1101]]
+;
+  %ovm = or i32 %tmp1, 145
+  %ov31 = and i32 %ovm, 177
+  %ov110 = xor i32 %ov31, 153
+  ret i32 %ov110
+}
+
+; defect-2 in rdar://12329730
+; (X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+;   where the "X" has more than one use
+define i32 @test5(i32 %val1) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[VAL1:%.*]], 1234
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[VAL1]], 8
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i32 [[SHR]], 5
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[XOR1]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %xor = xor i32 %val1, 1234
+  %shr = lshr i32 %xor, 8
+  %xor1 = xor i32 %shr, 1
+  %add = add i32 %xor1, %xor
+  ret i32 %add
+}
+
+; defect-1 in rdar://12329730
+; Simplify (X^Y) -> X or Y in the user's context if we know that
+; only bits from X or Y are demanded.
+; e.g. the "x ^ 1234" can be optimized into x in the context of "t >> 16".
+;  Put in other word, t >> 16 -> x >> 16.
+; unsigned foo(unsigned x) { unsigned t = x ^ 1234; ;  return (t >> 16) + t;}
+define i32 @test6(i32 %x) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[X:%.*]], 1234
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], 16
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SHR]], [[XOR]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %xor = xor i32 %x, 1234
+  %shr = lshr i32 %xor, 16
+  %add = add i32 %shr, %xor
+  ret i32 %add
+}
+
+
+; (A | B) ^ (~A) -> (A | ~B)
+define i32 @test7(i32 %a, i32 %b) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = or i32 [[B_NOT]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %or = or i32 %a, %b
+  %neg = xor i32 %a, -1
+  %xor = xor i32 %or, %neg
+  ret i32 %xor
+}
+
+; (~A) ^ (A | B) -> (A | ~B)
+define i32 @test8(i32 %a, i32 %b) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = or i32 [[B_NOT]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %neg = xor i32 %a, -1
+  %or = or i32 %a, %b
+  %xor = xor i32 %neg, %or
+  ret i32 %xor
+}
+
+; (A & B) ^ (A ^ B) -> (A | B)
+define i32 @test9(i32 %b, i32 %c) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[XOR2:%.*]] = or i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR2]]
+;
+  %and = and i32 %b, %c
+  %xor = xor i32 %b, %c
+  %xor2 = xor i32 %and, %xor
+  ret i32 %xor2
+}
+
+; (A & B) ^ (B ^ A) -> (A | B)
+define i32 @test9b(i32 %b, i32 %c) {
+; CHECK-LABEL: @test9b(
+; CHECK-NEXT:    [[XOR2:%.*]] = or i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR2]]
+;
+  %and = and i32 %b, %c
+  %xor = xor i32 %c, %b
+  %xor2 = xor i32 %and, %xor
+  ret i32 %xor2
+}
+
+; (A ^ B) ^ (A & B) -> (A | B)
+define i32 @test10(i32 %b, i32 %c) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[XOR2:%.*]] = or i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR2]]
+;
+  %xor = xor i32 %b, %c
+  %and = and i32 %b, %c
+  %xor2 = xor i32 %xor, %and
+  ret i32 %xor2
+}
+
+; (A ^ B) ^ (A & B) -> (A | B)
+define i32 @test10b(i32 %b, i32 %c) {
+; CHECK-LABEL: @test10b(
+; CHECK-NEXT:    [[XOR2:%.*]] = or i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR2]]
+;
+  %xor = xor i32 %b, %c
+  %and = and i32 %c, %b
+  %xor2 = xor i32 %xor, %and
+  ret i32 %xor2
+}
+
+define i32 @test11(i32 %A, i32 %B) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    ret i32 0
+;
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %and = and i32 %xor1, %xor2
+  ret i32 %and
+}
+
+define i32 @test11b(i32 %A, i32 %B) {
+; CHECK-LABEL: @test11b(
+; CHECK-NEXT:    ret i32 0
+;
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %and = and i32 %xor2, %xor1
+  ret i32 %and
+}
+
+define i32 @test11c(i32 %A, i32 %B) {
+; CHECK-LABEL: @test11c(
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i32 [[NOT]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[XOR1]], [[XOR2]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %xor1 = xor i32 %A, %B
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %and = and i32 %xor1, %xor2
+  ret i32 %and
+}
+
+define i32 @test11d(i32 %A, i32 %B) {
+; CHECK-LABEL: @test11d(
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i32 [[NOT]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[XOR2]], [[XOR1]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %xor1 = xor i32 %A, %B
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %and = and i32 %xor2, %xor1
+  ret i32 %and
+}
+
+define i32 @test11e(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test11e(
+; CHECK-NEXT:    [[FORCE:%.*]] = mul i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i32 [[FORCE]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i32 [[FORCE]], [[NOT]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[XOR1]], [[XOR2]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %force = mul i32 %B, %C
+  %xor1 = xor i32 %force, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %force, %not
+  %and = and i32 %xor1, %xor2
+  ret i32 %and
+}
+
+define i32 @test11f(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test11f(
+; CHECK-NEXT:    [[FORCE:%.*]] = mul i32 [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i32 [[FORCE]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i32 [[FORCE]], [[NOT]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[XOR2]], [[XOR1]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %force = mul i32 %B, %C
+  %xor1 = xor i32 %force, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %force, %not
+  %and = and i32 %xor2, %xor1
+  ret i32 %and
+}
+
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %negb = xor i32 %b, -1
+  %and = and i32 %a, %negb
+  %nega = xor i32 %a, -1
+  %xor = xor i32 %and, %nega
+  ret i32 %xor
+}
+
+define i32 @test12commuted(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12commuted(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %negb = xor i32 %b, -1
+  %and = and i32 %negb, %a
+  %nega = xor i32 %a, -1
+  %xor = xor i32 %and, %nega
+  ret i32 %xor
+}
+
+; This is a test of canonicalization via operand complexity.
+; The final xor has a binary operator and a (fake) unary operator,
+; so binary (more complex) should come first.
+
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %nega = xor i32 %a, -1
+  %negb = xor i32 %b, -1
+  %and = and i32 %a, %negb
+  %xor = xor i32 %nega, %and
+  ret i32 %xor
+}
+
+define i32 @test13commuted(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13commuted(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %nega = xor i32 %a, -1
+  %negb = xor i32 %b, -1
+  %and = and i32 %negb, %a
+  %xor = xor i32 %nega, %and
+  ret i32 %xor
+}
+
+; (A ^ C) ^ (A | B) -> ((~A) & B) ^ C
+
+define i32 @xor_or_xor_common_op_commute1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @xor_or_xor_common_op_commute1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[TMP2]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %a, %c
+  %ab = or i32 %a, %b
+  %r = xor i32 %ac, %ab
+  ret i32 %r
+}
+
+; (C ^ A) ^ (A | B) -> ((~A) & B) ^ C
+
+define i32 @xor_or_xor_common_op_commute2(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @xor_or_xor_common_op_commute2(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[TMP2]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %c, %a
+  %ab = or i32 %a, %b
+  %r = xor i32 %ac, %ab
+  ret i32 %r
+}
+
+; (A ^ C) ^ (B | A) -> ((~A) & B) ^ C
+
+define i32 @xor_or_xor_common_op_commute3(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @xor_or_xor_common_op_commute3(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[TMP2]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %a, %c
+  %ab = or i32 %b, %a
+  %r = xor i32 %ac, %ab
+  ret i32 %r
+}
+
+; (C ^ A) ^ (B | A) -> ((~A) & B) ^ C
+
+define i32 @xor_or_xor_common_op_commute4(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @xor_or_xor_common_op_commute4(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[TMP2]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %c, %a
+  %ab = or i32 %b, %a
+  %r = xor i32 %ac, %ab
+  ret i32 %r
+}
+
+; (A | B) ^ (A ^ C) -> ((~A) & B) ^ C
+
+define i32 @xor_or_xor_common_op_commute5(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @xor_or_xor_common_op_commute5(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[TMP2]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %a, %c
+  %ab = or i32 %a, %b
+  %r = xor i32 %ab, %ac
+  ret i32 %r
+}
+
+; (A | B) ^ (C ^ A) -> ((~A) & B) ^ C
+
+define i32 @xor_or_xor_common_op_commute6(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @xor_or_xor_common_op_commute6(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[TMP2]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %c, %a
+  %ab = or i32 %a, %b
+  %r = xor i32 %ab, %ac
+  ret i32 %r
+}
+
+; (B | A) ^ (A ^ C) -> ((~A) & B) ^ C
+
+define i32 @xor_or_xor_common_op_commute7(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @xor_or_xor_common_op_commute7(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[TMP2]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %a, %c
+  %ab = or i32 %b, %a
+  %r = xor i32 %ab, %ac
+  ret i32 %r
+}
+
+; (B | A) ^ (C ^ A) -> ((~A) & B) ^ C
+
+define i32 @xor_or_xor_common_op_commute8(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @xor_or_xor_common_op_commute8(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[TMP2]], [[C:%.*]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %c, %a
+  %ab = or i32 %b, %a
+  %r = xor i32 %ab, %ac
+  ret i32 %r
+}
+
+define i32 @xor_or_xor_common_op_extra_use1(i32 %a, i32 %b, i32 %c, i32* %p) {
+; CHECK-LABEL: @xor_or_xor_common_op_extra_use1(
+; CHECK-NEXT:    [[AC:%.*]] = xor i32 [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    store i32 [[AC]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[AB:%.*]] = or i32 [[A]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[AC]], [[AB]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %a, %c
+  store i32 %ac, i32* %p
+  %ab = or i32 %a, %b
+  %r = xor i32 %ac, %ab
+  ret i32 %r
+}
+
+define i32 @xor_or_xor_common_op_extra_use2(i32 %a, i32 %b, i32 %c, i32* %p) {
+; CHECK-LABEL: @xor_or_xor_common_op_extra_use2(
+; CHECK-NEXT:    [[AC:%.*]] = xor i32 [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[AB:%.*]] = or i32 [[A]], [[B:%.*]]
+; CHECK-NEXT:    store i32 [[AB]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[AC]], [[AB]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %a, %c
+  %ab = or i32 %a, %b
+  store i32 %ab, i32* %p
+  %r = xor i32 %ac, %ab
+  ret i32 %r
+}
+
+define i32 @xor_or_xor_common_op_extra_use3(i32 %a, i32 %b, i32 %c, i32* %p1, i32* %p2) {
+; CHECK-LABEL: @xor_or_xor_common_op_extra_use3(
+; CHECK-NEXT:    [[AC:%.*]] = xor i32 [[A:%.*]], [[C:%.*]]
+; CHECK-NEXT:    store i32 [[AC]], i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    [[AB:%.*]] = or i32 [[A]], [[B:%.*]]
+; CHECK-NEXT:    store i32 [[AB]], i32* [[P2:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = xor i32 [[AC]], [[AB]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %ac = xor i32 %a, %c
+  store i32 %ac, i32* %p1
+  %ab = or i32 %a, %b
+  store i32 %ab, i32* %p2
+  %r = xor i32 %ac, %ab
+  ret i32 %r
+}
+
+define i8 @test15(i8 %A, i8 %B) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A]], 33
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[XOR1]], -34
+; CHECK-NEXT:    [[RES:%.*]] = mul i8 [[AND]], [[XOR2]]
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %xor1 = xor i8 %B, %A
+  %not = xor i8 %A, 33
+  %xor2 = xor i8 %not, %B
+  %and = and i8 %xor1, %xor2
+  %res = mul i8 %and, %xor2 ; to increase the use count for the xor
+  ret i8 %res
+}
+
+define i8 @test16(i8 %A, i8 %B) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i8 [[A]], 33
+; CHECK-NEXT:    [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[XOR1]], -34
+; CHECK-NEXT:    [[RES:%.*]] = mul i8 [[AND]], [[XOR2]]
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %xor1 = xor i8 %B, %A
+  %not = xor i8 %A, 33
+  %xor2 = xor i8 %not, %B
+  %and = and i8 %xor2, %xor1
+  %res = mul i8 %and, %xor2 ; to increase the use count for the xor
+  ret i8 %res
+}
diff --git a/llvm/test/Transforms/InstCombine/zero-point-zero-add.ll b/llvm/test/Transforms/InstCombine/zero-point-zero-add.ll
new file mode 100644
index 00000000000..a23db75525e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/zero-point-zero-add.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare double @fabs(double) readonly
+
+define double @test(double %X) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[Y:%.*]] = fadd double %X, 0.000000e+00
+; CHECK-NEXT:    ret double [[Y]]
+;
+  %Y = fadd double %X, 0.0          ;; Should be a single add x, 0.0
+  %Z = fadd double %Y, 0.0
+  ret double %Z
+}
+
+define double @test1(double %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[Y:%.*]] = call double @llvm.fabs.f64(double %X)
+; CHECK-NEXT:    ret double [[Y]]
+;
+  %Y = call double @fabs(double %X)
+  %Z = fadd double %Y, 0.0
+  ret double %Z
+}
diff --git a/llvm/test/Transforms/InstCombine/zeroext-and-reduce.ll b/llvm/test/Transforms/InstCombine/zeroext-and-reduce.ll
new file mode 100644
index 00000000000..74bb731311b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/zeroext-and-reduce.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i8 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 %X, 8
+; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %Y = zext i8 %X to i32
+  %Z = and i32 %Y, 65544
+  ret i32 %Z
+}
+
+
diff --git a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll
new file mode 100644
index 00000000000..86c20697288
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -0,0 +1,404 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; rdar://11748024
+
+define i32 @a(i1 zeroext %x, i1 zeroext %y) {
+; CHECK-LABEL: @a(
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[X:%.*]], i32 2, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[Y:%.*]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = sub nsw i32 [[SUB]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %conv = zext i1 %x to i32
+  %conv3 = zext i1 %y to i32
+  %conv3.neg = sub i32 0, %conv3
+  %sub = add i32 %conv, 1
+  %add = add i32 %sub, %conv3.neg
+  ret i32 %add
+}
+
+define i32 @PR30273_select(i1 %a, i1 %b) {
+; CHECK-LABEL: @PR30273_select(
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[A:%.*]] to i32
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[A]], i32 2, i32 1
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[B:%.*]], i32 [[SEL1]], i32 [[ZEXT]]
+; CHECK-NEXT:    ret i32 [[SEL2]]
+;
+  %zext = zext i1 %a to i32
+  %sel1 = select i1 %a, i32 2, i32 1
+  %sel2 = select i1 %b, i32 %sel1, i32 %zext
+  ret i32 %sel2
+}
+
+define i32 @PR30273_zext_add(i1 %a, i1 %b) {
+; CHECK-LABEL: @PR30273_zext_add(
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[A:%.*]] to i32
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i1 [[B:%.*]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV3]], [[CONV]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %conv = zext i1 %a to i32
+  %conv3 = zext i1 %b to i32
+  %add = add nuw nsw i32 %conv3, %conv
+  ret i32 %add
+}
+
+define i32 @PR30273_three_bools(i1 %x, i1 %y, i1 %z) {
+; CHECK-LABEL: @PR30273_three_bools(
+; CHECK-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[X:%.*]] to i32
+; CHECK-NEXT:    [[ADD1:%.*]] = select i1 [[X]], i32 2, i32 1
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[Y:%.*]], i32 [[ADD1]], i32 [[FROMBOOL]]
+; CHECK-NEXT:    [[ADD2:%.*]] = zext i1 [[Z:%.*]] to i32
+; CHECK-NEXT:    [[SEL2:%.*]] = add nuw nsw i32 [[SEL1]], [[ADD2]]
+; CHECK-NEXT:    ret i32 [[SEL2]]
+;
+  %frombool = zext i1 %x to i32
+  %add1 = add nsw i32 %frombool, 1
+  %sel1 = select i1 %y, i32 %add1, i32 %frombool
+  %add2 = add nsw i32 %sel1, 1
+  %sel2 = select i1 %z, i32 %add2, i32 %sel1
+  ret i32 %sel2
+}
+
+define i32 @zext_add_scalar(i1 %x) {
+; CHECK-LABEL: @zext_add_scalar(
+; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[X:%.*]], i32 43, i32 42
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %zext = zext i1 %x to i32
+  %add = add i32 %zext, 42
+  ret i32 %add
+}
+
+define <2 x i32> @zext_add_vec_splat(<2 x i1> %x) {
+; CHECK-LABEL: @zext_add_vec_splat(
+; CHECK-NEXT:    [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 43, i32 43>, <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    ret <2 x i32> [[ADD]]
+;
+  %zext = zext <2 x i1> %x to <2 x i32>
+  %add = add <2 x i32> %zext, <i32 42, i32 42>
+  ret <2 x i32> %add
+}
+
+define <2 x i32> @zext_add_vec(<2 x i1> %x) {
+; CHECK-LABEL: @zext_add_vec(
+; CHECK-NEXT:    [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 43, i32 24>, <2 x i32> <i32 42, i32 23>
+; CHECK-NEXT:    ret <2 x i32> [[ADD]]
+;
+  %zext = zext <2 x i1> %x to <2 x i32>
+  %add = add <2 x i32> %zext, <i32 42, i32 23>
+  ret <2 x i32> %add
+}
+
+declare void @use(i64)
+
+define i64 @zext_negate(i1 %A) {
+; CHECK-LABEL: @zext_negate(
+; CHECK-NEXT:    [[SUB:%.*]] = sext i1 [[A:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ext = zext i1 %A to i64
+  %sub = sub i64 0, %ext
+  ret i64 %sub
+}
+
+define i64 @zext_negate_extra_use(i1 %A) {
+; CHECK-LABEL: @zext_negate_extra_use(
+; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[A:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sext i1 [[A]] to i64
+; CHECK-NEXT:    call void @use(i64 [[EXT]])
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ext = zext i1 %A to i64
+  %sub = sub i64 0, %ext
+  call void @use(i64 %ext)
+  ret i64 %sub
+}
+
+define <2 x i64> @zext_negate_vec(<2 x i1> %A) {
+; CHECK-LABEL: @zext_negate_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i1> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ext = zext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> zeroinitializer, %ext
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @zext_negate_vec_undef_elt(<2 x i1> %A) {
+; CHECK-LABEL: @zext_negate_vec_undef_elt(
+; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i1> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ext = zext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> <i64 0, i64 undef>, %ext
+  ret <2 x i64> %sub
+}
+
+define i64 @zext_sub_const(i1 %A) {
+; CHECK-LABEL: @zext_sub_const(
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[A:%.*]], i64 41, i64 42
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ext = zext i1 %A to i64
+  %sub = sub i64 42, %ext
+  ret i64 %sub
+}
+
+define i64 @zext_sub_const_extra_use(i1 %A) {
+; CHECK-LABEL: @zext_sub_const_extra_use(
+; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[A:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[A]], i64 41, i64 42
+; CHECK-NEXT:    call void @use(i64 [[EXT]])
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ext = zext i1 %A to i64
+  %sub = sub i64 42, %ext
+  call void @use(i64 %ext)
+  ret i64 %sub
+}
+
+define <2 x i64> @zext_sub_const_vec(<2 x i1> %A) {
+; CHECK-LABEL: @zext_sub_const_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = select <2 x i1> [[A:%.*]], <2 x i64> <i64 41, i64 2>, <2 x i64> <i64 42, i64 3>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ext = zext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> <i64 42, i64 3>, %ext
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @zext_sub_const_vec_undef_elt(<2 x i1> %A) {
+; CHECK-LABEL: @zext_sub_const_vec_undef_elt(
+; CHECK-NEXT:    [[SUB:%.*]] = select <2 x i1> [[A:%.*]], <2 x i64> <i64 41, i64 undef>, <2 x i64> <i64 42, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ext = zext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> <i64 42, i64 undef>, %ext
+  ret <2 x i64> %sub
+}
+
+define i64 @sext_negate(i1 %A) {
+; CHECK-LABEL: @sext_negate(
+; CHECK-NEXT:    [[SUB:%.*]] = zext i1 [[A:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ext = sext i1 %A to i64
+  %sub = sub i64 0, %ext
+  ret i64 %sub
+}
+
+define i64 @sext_negate_extra_use(i1 %A) {
+; CHECK-LABEL: @sext_negate_extra_use(
+; CHECK-NEXT:    [[EXT:%.*]] = sext i1 [[A:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = zext i1 [[A]] to i64
+; CHECK-NEXT:    call void @use(i64 [[EXT]])
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ext = sext i1 %A to i64
+  %sub = sub i64 0, %ext
+  call void @use(i64 %ext)
+  ret i64 %sub
+}
+
+define <2 x i64> @sext_negate_vec(<2 x i1> %A) {
+; CHECK-LABEL: @sext_negate_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i1> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ext = sext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> zeroinitializer, %ext
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @sext_negate_vec_undef_elt(<2 x i1> %A) {
+; CHECK-LABEL: @sext_negate_vec_undef_elt(
+; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i1> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ext = sext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> <i64 0, i64 undef>, %ext
+  ret <2 x i64> %sub
+}
+
+define i64 @sext_sub_const(i1 %A) {
+; CHECK-LABEL: @sext_sub_const(
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[A:%.*]], i64 43, i64 42
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ext = sext i1 %A to i64
+  %sub = sub i64 42, %ext
+  ret i64 %sub
+}
+
+define i64 @sext_sub_const_extra_use(i1 %A) {
+; CHECK-LABEL: @sext_sub_const_extra_use(
+; CHECK-NEXT:    [[EXT:%.*]] = sext i1 [[A:%.*]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[A]], i64 43, i64 42
+; CHECK-NEXT:    call void @use(i64 [[EXT]])
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ext = sext i1 %A to i64
+  %sub = sub i64 42, %ext
+  call void @use(i64 %ext)
+  ret i64 %sub
+}
+
+define <2 x i64> @sext_sub_const_vec(<2 x i1> %A) {
+; CHECK-LABEL: @sext_sub_const_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = select <2 x i1> [[A:%.*]], <2 x i64> <i64 43, i64 4>, <2 x i64> <i64 42, i64 3>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ext = sext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> <i64 42, i64 3>, %ext
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @sext_sub_const_vec_undef_elt(<2 x i1> %A) {
+; CHECK-LABEL: @sext_sub_const_vec_undef_elt(
+; CHECK-NEXT:    [[SUB:%.*]] = select <2 x i1> [[A:%.*]], <2 x i64> <i64 undef, i64 43>, <2 x i64> <i64 undef, i64 42>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ext = sext <2 x i1> %A to <2 x i64>
+  %sub = sub <2 x i64> <i64 undef, i64 42>, %ext
+  ret <2 x i64> %sub
+}
+
+define i8 @sext_sub(i8 %x, i1 %y) {
+; CHECK-LABEL: @sext_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[Y:%.*]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = add i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[SUB]]
+;
+  %sext = sext i1 %y to i8
+  %sub = sub i8 %x, %sext
+  ret i8 %sub
+}
+
+; Vectors get the same transform.
+
+define <2 x i8> @sext_sub_vec(<2 x i8> %x, <2 x i1> %y) {
+; CHECK-LABEL: @sext_sub_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[Y:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[SUB]]
+;
+  %sext = sext <2 x i1> %y to <2 x i8>
+  %sub = sub <2 x i8> %x, %sext
+  ret <2 x i8> %sub
+}
+
+; NSW is preserved.
+
+define <2 x i8> @sext_sub_vec_nsw(<2 x i8> %x, <2 x i1> %y) {
+; CHECK-LABEL: @sext_sub_vec_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[Y:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw <2 x i8> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[SUB]]
+;
+  %sext = sext <2 x i1> %y to <2 x i8>
+  %sub = sub nsw <2 x i8> %x, %sext
+  ret <2 x i8> %sub
+}
+
+; We favor the canonical zext+add over keeping the NUW.
+
+define i8 @sext_sub_nuw(i8 %x, i1 %y) {
+; CHECK-LABEL: @sext_sub_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[Y:%.*]] to i8
+; CHECK-NEXT:    [[SUB:%.*]] = add i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[SUB]]
+;
+  %sext = sext i1 %y to i8
+  %sub = sub nuw i8 %x, %sext
+  ret i8 %sub
+}
+
+define i32 @sextbool_add(i1 %c, i32 %x) {
+; CHECK-LABEL: @sextbool_add(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[C:%.*]] to i32
+; CHECK-NEXT:    [[S:%.*]] = sub i32 [[X:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %b = sext i1 %c to i32
+  %s = add i32 %b, %x
+  ret i32 %s
+}
+
+define i32 @sextbool_add_commute(i1 %c, i32 %px) {
+; CHECK-LABEL: @sextbool_add_commute(
+; CHECK-NEXT:    [[X:%.*]] = urem i32 [[PX:%.*]], 42
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[C:%.*]] to i32
+; CHECK-NEXT:    [[S:%.*]] = sub nsw i32 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %x = urem i32 %px, 42 ; thwart complexity-based canonicalization
+  %b = sext i1 %c to i32
+  %s = add i32 %x, %b
+  ret i32 %s
+}
+
+; Negative test - extra use prevents canonicalization.
+
+declare void @use32(i32)
+
+define i32 @sextbool_add_uses(i1 %c, i32 %x) {
+; CHECK-LABEL: @sextbool_add_uses(
+; CHECK-NEXT:    [[B:%.*]] = sext i1 [[C:%.*]] to i32
+; CHECK-NEXT:    call void @use32(i32 [[B]])
+; CHECK-NEXT:    [[S:%.*]] = add i32 [[B]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %b = sext i1 %c to i32
+  call void @use32(i32 %b)
+  %s = add i32 %b, %x
+  ret i32 %s
+}
+
+define <4 x i32> @sextbool_add_vector(<4 x i1> %c, <4 x i32> %x) {
+; CHECK-LABEL: @sextbool_add_vector(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i1> [[C:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[S:%.*]] = sub <4 x i32> [[X:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = sext <4 x i1> %c to <4 x i32>
+  %s = add <4 x i32> %x, %b
+  ret <4 x i32> %s
+}
+
+define i32 @zextbool_sub(i1 %c, i32 %x) {
+; CHECK-LABEL: @zextbool_sub(
+; CHECK-NEXT:    [[B:%.*]] = zext i1 [[C:%.*]] to i32
+; CHECK-NEXT:    [[S:%.*]] = sub i32 [[B]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %b = zext i1 %c to i32
+  %s = sub i32 %b, %x
+  ret i32 %s
+}
+
+define i32 @zextbool_sub_uses(i1 %c, i32 %x) {
+; CHECK-LABEL: @zextbool_sub_uses(
+; CHECK-NEXT:    [[B:%.*]] = zext i1 [[C:%.*]] to i32
+; CHECK-NEXT:    call void @use32(i32 [[B]])
+; CHECK-NEXT:    [[S:%.*]] = sub i32 [[X:%.*]], [[B]]
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %b = zext i1 %c to i32
+  call void @use32(i32 %b)
+  %s = sub i32 %x, %b
+  ret i32 %s
+}
+
+define <4 x i32> @zextbool_sub_vector(<4 x i1> %c, <4 x i32> %x) {
+; CHECK-LABEL: @zextbool_sub_vector(
+; CHECK-NEXT:    [[B:%.*]] = zext <4 x i1> [[C:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[S:%.*]] = sub <4 x i32> [[X:%.*]], [[B]]
+; CHECK-NEXT:    ret <4 x i32> [[S]]
+;
+  %b = zext <4 x i1> %c to <4 x i32>
+  %s = sub <4 x i32> %x, %b
+  ret <4 x i32> %s
+}
+
diff --git a/llvm/test/Transforms/InstCombine/zext-fold.ll b/llvm/test/Transforms/InstCombine/zext-fold.ll
new file mode 100644
index 00000000000..12e49b3f946
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/zext-fold.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1570
+
+define i32 @test2(float %X, float %Y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ord float %X, %Y
+; CHECK-NEXT:    [[TOBOOLNOT5:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    ret i32 [[TOBOOLNOT5]]
+;
+  %tmp3 = fcmp uno float %X, %Y
+  %tmp34 = zext i1 %tmp3 to i8
+  %tmp = xor i8 %tmp34, 1
+  %toBoolnot5 = zext i8 %tmp to i32
+  ret i32 %toBoolnot5
+}
+
diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
new file mode 100644
index 00000000000..afbe36da3e3
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Remove an icmp by using its operand in the subsequent logic directly.
+
+define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) {
+  %mask = and i8 %a, 1
+  %toBool1 = icmp eq i8 %mask, 0
+  %toBool2 = icmp eq i8 %b, 0
+  %bothCond = or i1 %toBool1, %toBool2
+  %zext = zext i1 %bothCond to i8
+  ret i8 %zext
+
+; CHECK-LABEL: zext_or_icmp_icmp(
+; CHECK-NEXT:    %mask = and i8 %a, 1
+; CHECK-NEXT:    %toBool2 = icmp eq i8 %b, 0
+; CHECK-NEXT:    %toBool22 = zext i1 %toBool2 to i8
+; CHECK-NEXT:    %1 = xor i8 %mask, 1
+; CHECK-NEXT:    %zext = or i8 %1, %toBool22
+; CHECK-NEXT:    ret i8 %zext
+}
+
+; Here, widening the or from i1 to i32 and removing one of the icmps would
+; widen an undef value (created by the out-of-range shift), increasing the
+; range of valid values for the return, so we can't do it.
+define i32 @dont_widen_undef() {
+entry:
+  br label %block2
+
+block1:
+  br label %block2
+
+block2:
+  %m.011 = phi i32 [ 33, %entry ], [ 0, %block1 ]
+  %cmp.i = icmp ugt i32 %m.011, 1
+  %m.1.op = lshr i32 1, %m.011
+  %sext.mask = and i32 %m.1.op, 65535
+  %cmp115 = icmp ne i32 %sext.mask, 0
+  %cmp1 = or i1 %cmp.i, %cmp115
+  %conv2 = zext i1 %cmp1 to i32
+  ret i32 %conv2
+
+; CHECK-LABEL: dont_widen_undef(
+; CHECK:         %m.011 = phi i32 [ 33, %entry ], [ 0, %block1 ]
+; CHECK-NEXT:    %cmp.i = icmp ugt i32 %m.011, 1
+; CHECK-NEXT:    %m.1.op = lshr i32 1, %m.011
+; CHECK-NEXT:    %sext.mask = and i32 %m.1.op, 65535
+; CHECK-NEXT:    %cmp115 = icmp ne i32 %sext.mask, 0
+; CHECK-NEXT:    %cmp1 = or i1 %cmp.i, %cmp115
+; CHECK-NEXT:    %conv2 = zext i1 %cmp1 to i32
+; CHECK-NEXT:    ret i32 %conv2
+}
diff --git a/llvm/test/Transforms/InstCombine/zext-phi.ll b/llvm/test/Transforms/InstCombine/zext-phi.ll
new file mode 100644
index 00000000000..5e352415c74
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/zext-phi.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n8:16:32:64"
+
+; Although i1 is not in the datalayout, we should treat it
+; as a legal type because it is a fundamental type in IR.
+; This means we should shrink the phi (sink the zexts).
+
+define i64 @sink_i1_casts(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @sink_i1_casts(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %if, label %end
+; CHECK:       if:
+; CHECK-NEXT:    br label %end
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI_IN:%.*]] = phi i1 [ %cond1, %entry ], [ %cond2, %if ]
+; CHECK-NEXT:    [[PHI:%.*]] = zext i1 [[PHI_IN]] to i64
+; CHECK-NEXT:    ret i64 [[PHI]]
+;
+entry:
+  %z1 = zext i1 %cond1 to i64
+  br i1 %cond1, label %if, label %end
+
+if:
+  %z2 = zext i1 %cond2 to i64
+  br label %end
+
+end:
+  %phi = phi i64 [ %z1, %entry ], [ %z2, %if ]
+  ret i64 %phi
+}
+
diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll
new file mode 100644
index 00000000000..a53bf6cc028
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/zext.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @test_sext_zext(i16 %A) {
+; CHECK-LABEL: @test_sext_zext(
+; CHECK-NEXT:    [[C2:%.*]] = zext i16 %A to i64
+; CHECK-NEXT:    ret i64 [[C2]]
+;
+  %c1 = zext i16 %A to i32
+  %c2 = sext i32 %c1 to i64
+  ret i64 %c2
+}
+
+define <2 x i64> @test2(<2 x i1> %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i1> %A, <i1 true, i1 true>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i1> [[XOR]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ZEXT]]
+;
+  %xor = xor <2 x i1> %A, <i1 true, i1 true>
+  %zext = zext <2 x i1> %xor to <2 x i64>
+  ret <2 x i64> %zext
+}
+
+define <2 x i64> @test3(<2 x i64> %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> %A, <i64 23, i64 42>
+; CHECK-NEXT:    ret <2 x i64> [[AND]]
+;
+  %trunc = trunc <2 x i64> %A to <2 x i32>
+  %and = and <2 x i32> %trunc, <i32 23, i32 42>
+  %zext = zext <2 x i32> %and to <2 x i64>
+  ret <2 x i64> %zext
+}
+
+define <2 x i64> @test4(<2 x i64> %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[A:%.*]], <i64 23, i64 42>
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i64> [[AND]], <i64 23, i64 42>
+; CHECK-NEXT:    ret <2 x i64> [[XOR]]
+;
+  %trunc = trunc <2 x i64> %A to <2 x i32>
+  %and = and <2 x i32> %trunc, <i32 23, i32 42>
+  %xor = xor <2 x i32> %and, <i32 23, i32 42>
+  %zext = zext <2 x i32> %xor to <2 x i64>
+  ret <2 x i64> %zext
+}
+
+define i64 @fold_xor_zext_sandwich(i1 %a) {
+; CHECK-LABEL: @fold_xor_zext_sandwich(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 %a, true
+; CHECK-NEXT:    [[ZEXT2:%.*]] = zext i1 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[ZEXT2]]
+;
+  %zext1 = zext i1 %a to i32
+  %xor = xor i32 %zext1, 1
+  %zext2 = zext i32 %xor to i64
+  ret i64 %zext2
+}
+
+define <2 x i64> @fold_xor_zext_sandwich_vec(<2 x i1> %a) {
+; CHECK-LABEL: @fold_xor_zext_sandwich_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i1> %a, <i1 true, i1 true>
+; CHECK-NEXT:    [[ZEXT2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[ZEXT2]]
+;
+  %zext1 = zext <2 x i1> %a to <2 x i32>
+  %xor = xor <2 x i32> %zext1, <i32 1, i32 1>
+  %zext2 = zext <2 x i32> %xor to <2 x i64>
+  ret <2 x i64> %zext2
+}
+
+; Assert that zexts in and(zext(icmp), zext(icmp)) can be folded.
+
+define i8 @fold_and_zext_icmp(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @fold_and_zext_icmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i64 %a, %c
+; CHECK-NEXT:    [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
+; CHECK-NEXT:    ret i8 [[TMP4]]
+;
+  %1 = icmp sgt i64 %a, %b
+  %2 = zext i1 %1 to i8
+  %3 = icmp slt i64 %a, %c
+  %4 = zext i1 %3 to i8
+  %5 = and i8 %2, %4
+  ret i8 %5
+}
+
+; Assert that zexts in or(zext(icmp), zext(icmp)) can be folded.
+
+define i8 @fold_or_zext_icmp(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @fold_or_zext_icmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i64 %a, %c
+; CHECK-NEXT:    [[TMP3:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
+; CHECK-NEXT:    ret i8 [[TMP4]]
+;
+  %1 = icmp sgt i64 %a, %b
+  %2 = zext i1 %1 to i8
+  %3 = icmp slt i64 %a, %c
+  %4 = zext i1 %3 to i8
+  %5 = or i8 %2, %4
+  ret i8 %5
+}
+
+; Assert that zexts in xor(zext(icmp), zext(icmp)) can be folded.
+
+define i8 @fold_xor_zext_icmp(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @fold_xor_zext_icmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i64 %a, %c
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
+; CHECK-NEXT:    ret i8 [[TMP4]]
+;
+  %1 = icmp sgt i64 %a, %b
+  %2 = zext i1 %1 to i8
+  %3 = icmp slt i64 %a, %c
+  %4 = zext i1 %3 to i8
+  %5 = xor i8 %2, %4
+  ret i8 %5
+}
+
+; Assert that zexts in logic(zext(icmp), zext(icmp)) are also folded accross
+; nested logical operators.
+
+define i8 @fold_nested_logic_zext_icmp(i64 %a, i64 %b, i64 %c, i64 %d) {
+; CHECK-LABEL: @fold_nested_logic_zext_icmp(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i64 %a, %c
+; CHECK-NEXT:    [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 %a, %d
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i8
+; CHECK-NEXT:    ret i8 [[TMP6]]
+;
+  %1 = icmp sgt i64 %a, %b
+  %2 = zext i1 %1 to i8
+  %3 = icmp slt i64 %a, %c
+  %4 = zext i1 %3 to i8
+  %5 = and i8 %2, %4
+  %6 = icmp eq i64 %a, %d
+  %7 = zext i1 %6 to i8
+  %8 = or i8 %5, %7
+  ret i8 %8
+}
+
+; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
+
+define i1024 @sext_zext_apint1(i77 %A) {
+; CHECK-LABEL: @sext_zext_apint1(
+; CHECK-NEXT:    [[C2:%.*]] = zext i77 %A to i1024
+; CHECK-NEXT:    ret i1024 [[C2]]
+;
+  %c1 = zext i77 %A to i533
+  %c2 = sext i533 %c1 to i1024
+  ret i1024 %c2
+}
+
+; This test is for Integer BitWidth <= 64 && BitWidth % 2 != 0.
+
+define i47 @sext_zext_apint2(i11 %A) {
+; CHECK-LABEL: @sext_zext_apint2(
+; CHECK-NEXT:    [[C2:%.*]] = zext i11 %A to i47
+; CHECK-NEXT:    ret i47 [[C2]]
+;
+  %c1 = zext i11 %A to i39
+  %c2 = sext i39 %c1 to i47
+  ret i47 %c2
+}
+